Compare commits

..

19 Commits

Author SHA1 Message Date
09407b08ba UnicodeConverter - restore icu 55.1 builds for iMac 2018-04-06 11:40:57 +03:00
83bc1b18b6 fix RtfFileReader for iMac 2018-04-05 19:49:20 +03:00
566eeb06ae x2t version 2.4.526 2018-04-05 19:03:28 +03:00
f5c359650f x2t - fix build 2018-04-05 18:23:06 +03:00
3c93563027 . 2018-04-05 17:43:59 +03:00
d5b80e2fa3 x2t - fix https://github.com/ONLYOFFICE/Docker-DocumentServer/issues/87#issuecomment-374490389 2018-04-05 17:34:39 +03:00
2bc2039c01 . 2018-04-05 17:28:17 +03:00
a0fdb1cbcc PptFormatReader - fix shape geometry in files from newest ms office 2018-04-05 17:27:02 +03:00
664007983d x2t fix mac build 2018-04-05 17:14:05 +03:00
a15aee3244 v5.1.1 2018-04-05 15:55:18 +03:00
bf8bea5d07 Merge pull request #63 from ONLYOFFICE/hotfix/v5.1.1_fix_users_files
x2t -fix users files
2018-04-02 17:19:50 +03:00
d9bd1cdb32 x2t -fix users files 2018-04-02 17:14:46 +03:00
e1a8bd3b3d [draw] Fix gradient bug 2018-03-30 13:59:50 +03:00
a8f7a1f044 [ios][x2t] fixed build 2018-03-30 10:56:08 +03:00
5c43b5743d [changelog] Add changelog
Create CHANGELOG.md for hotfix/v5.1.1
2018-03-29 18:01:22 +03:00
5ae24e7e32 . 2018-03-28 13:08:56 +03:00
cc69c8c1b4 x2t - fix bug #37323 2018-03-28 13:03:56 +03:00
d0b2acb6b3 v5.1.0 2018-03-28 12:06:40 +03:00
44491bbbb4 v5.0.7 2018-01-15 15:18:16 +03:00
1047 changed files with 482835 additions and 2717 deletions

View File

@ -2026,11 +2026,19 @@ namespace DocFileFormat
nElemSize = 4;
bTruncated = true;
}
if (nElemSize == 0)
{
nElemSize = 2; //enredobar.doc
}
long dwSize = nElems * nElemSize;
if (opGuides->op - 6 != (dwSize))
{
bool b = false;
if (nElems > 0x7fff)
{
dwSize = (opGuides->op - 6);
}
}
int count = dwSize / nElemSize; //1x (int or short)
for (int i = 0; i < count; ++i)
@ -2070,12 +2078,19 @@ namespace DocFileFormat
nElemSize = 4;
bTruncated = true;
}
if (nElemSize == 0)
{
nElemSize = 2; //enredobar.doc
}
long dwSize = nElems * nElemSize;
if (opLocs->op - 6 != (dwSize))
{
bool b = false;
if (nElems > 0x7fff)
{
dwSize = (opLocs->op - 6);
}
}
int count = dwSize / nElemSize; //2x (int or short)

View File

@ -3013,6 +3013,7 @@ namespace BinDocxRW
OOX::Logic::CSectionProperty* pSectPr;
OOX::Logic::CBackground * pBackground;
OOX::CDocument* poDocument;
OOX::JsaProject* pJsaProject;
bool m_bWriteSectPr;//Записывать ли свойства верхнего уровня в данном экземпляре BinaryOtherTableWriter
//---------------------------------
@ -3022,6 +3023,7 @@ namespace BinDocxRW
pBackground = NULL;
pSectPr = NULL;
poDocument = NULL;
pJsaProject = NULL;
m_bWriteSectPr = false;
}
void WriteVbaProject(OOX::VbaProject& oVbaProject)
@ -3120,6 +3122,19 @@ namespace BinDocxRW
m_oBcw.WriteItemEnd(nCurPos);
}
}
//Write JsaProject
if (NULL != pJsaProject)
{
BYTE* pData = NULL;
DWORD nBytesCount;
if(NSFile::CFileBinary::ReadAllBytes(pJsaProject->filename().GetPath(), &pData, nBytesCount))
{
nCurPos = m_oBcw.WriteItemStart(c_oSerParType::JsaProject);
m_oBcw.m_oStream.WriteBYTEArray(pData, nBytesCount);
m_oBcw.WriteItemEnd(nCurPos);
RELEASEARRAYOBJECTS(pData);
}
}
}
void WriteBackground (OOX::Logic::CBackground* pBackground)
{
@ -7964,6 +7979,7 @@ namespace BinDocxRW
oBinaryDocumentTableWriter.pSectPr = pFirstSectPr;
oBinaryDocumentTableWriter.pBackground = oDocx.m_pDocument->m_oBackground.GetPointer();
oBinaryDocumentTableWriter.poDocument = oDocx.m_pDocument;
oBinaryDocumentTableWriter.pJsaProject = oDocx.m_pJsaProject;
oBinaryDocumentTableWriter.m_bWriteSectPr = true;
@ -7974,18 +7990,6 @@ namespace BinDocxRW
oBinaryDocumentTableWriter.WriteVbaProject(*oDocx.m_pVbaProject);
this->WriteTableEnd(nCurPos);
}
//Write JsaProject
if (NULL != oDocx.m_pJsaProject)
{
BYTE* pData = NULL;
DWORD nBytesCount;
if(NSFile::CFileBinary::ReadAllBytes(oDocx.m_pJsaProject->filename().GetPath(), &pData, nBytesCount))
{
nCurPos = m_oBcw.WriteItemStart(c_oSerParType::JsaProject);
m_oBcw.m_oStream.WriteBYTEArray(pData, nBytesCount);
m_oBcw.WriteItemEnd(nCurPos);
}
}
// Write content
nCurPos = this->WriteTableStart(BinDocxRW::c_oSerTableTypes::Document);
@ -7993,7 +7997,7 @@ namespace BinDocxRW
this->WriteTableEnd(nCurPos);
nCurPos = this->WriteTableStart(BinDocxRW::c_oSerTableTypes::HdrFtr);
oBinaryHeaderFooterTableWriter.Write();
oBinaryHeaderFooterTableWriter.Write();
this->WriteTableEnd(nCurPos);
if(NULL != m_oParamsWriter.m_poTheme)

View File

@ -60,7 +60,7 @@ void _mediaitems::add_or_find(const std::wstring & oox_ref, Type type, std::wst
{
std::wstring output_sub_path;
std::wstring output_fileName;
int number=0;
size_t number = 0;
if (type == typeImage)
{

View File

@ -1101,7 +1101,7 @@ void odf_drawing_context::end_frame()
/////////////////////
void odf_drawing_context::start_element(office_element_ptr elm, office_element_ptr style_elm)
{
int level = (int)impl_->current_level_.size();
size_t level = (int)impl_->current_level_.size();
if (impl_->current_level_.size() > 0 && elm)
impl_->current_level_.back()->add_child_element(elm);

View File

@ -490,7 +490,7 @@ void odf_lists_styles_context::set_numeric_format(std::wstring val)
}
if (r1 < 0) return;//??
r1 = s.find(L"%", 1);
r1 = (int)s.find(L"%", 1);
if (r1 >0)
{
int level = 0;
@ -498,7 +498,7 @@ void odf_lists_styles_context::set_numeric_format(std::wstring val)
int r2=0;
while (r2 < s.length())
{
r2 = s.find(L"%", r2);
r2 = (int)s.find(L"%", r2);
if (r2 >=0) level++;
else break;
r2++;

View File

@ -545,9 +545,9 @@ void odf_number_styles_context::create_currency_style(number_format_state & stat
{
create_element(L"number", L"currency-style", root_elm, odf_context_);
{
int res1 = state.format_code[0].rfind(L"]");
int res2 = state.format_code[0].rfind(L"#");
int res3 = state.format_code[0].rfind(L"0");
int res1 = (int)state.format_code[0].rfind(L"]");
int res2 = (int)state.format_code[0].rfind(L"#");
int res3 = (int)state.format_code[0].rfind(L"0");
office_element_ptr elm_symbol;
create_element(L"number", L"currency-symbol", elm_symbol, odf_context_);

View File

@ -135,7 +135,7 @@ void odf_master_state::add_header(office_element_ptr & elm)
///////////////////////////////////////////////////////////////////////////////
odf_layout_state::odf_layout_state(office_element_ptr & layout_elm )
{
int level =0;
size_t level = 0;
odf_element_state state = {layout_elm,L"",office_element_ptr(),level};
elements_.push_back(state);

View File

@ -400,11 +400,11 @@ bool ods_table_state::is_cell_comment()
int ods_table_state::is_cell_hyperlink(int col, int row)
{
for (size_t i=0; i < hyperlinks_.size();i++)
for (size_t i = 0; i < hyperlinks_.size(); i++)
{
if (hyperlinks_[i].col == col && hyperlinks_[i].row == row)
{
return i;
return (int)i;
}
}
return -1;
@ -415,7 +415,7 @@ int ods_table_state::is_cell_comment(int col, int row, short repeate_col)
{
if ((comments_[i].col < col + repeate_col && comments_[i].col >= col) && comments_[i].row == row)
{
return i;
return (int)i;
}
}
return -1;

View File

@ -200,7 +200,7 @@ void odt_conversion_context::add_text_content(const std::wstring & text)
{
if (drop_cap_state_.enabled)
{
int count = text.length();
int count = (int)text.length();
drop_cap_state_.characters += count;
style_text_properties * props = text_context()->get_text_properties();

View File

@ -66,10 +66,10 @@ public:
virtual void serialize(std::wostream & _Wostream);
text_text(const std::wstring & Text) : text_(Text) {};
text_text() {};
text_text(const std::wstring & Text) : text_(Text) {}
text_text() {}
std::wstring & attr_text() { return text_; };
std::wstring & attr_text() { return text_; }
virtual void add_text(const std::wstring & Text);
@ -263,7 +263,7 @@ public:
public:
text_reference_mark_start() {}
text_reference_mark_start(const std::wstring & Name) : text_name_(Name){};
text_reference_mark_start(const std::wstring & Name) : text_name_(Name){}
virtual void create_child_element(const std::wstring & Ns, const std::wstring & Name){}
virtual void add_child_element( const office_element_ptr & child_element){}

View File

@ -175,6 +175,7 @@ SOURCES += \
../Reader/SlidePersist.cpp \
../Records/Animations/AnimationTypes.cpp \
../Records/Drawing/ArtBlip.cpp \
../Records/Drawing/ShapeContainer.cpp \
../PPTXWriter/Converter.cpp \
../PPTXWriter/ShapeWriter.cpp
}

View File

@ -37,6 +37,7 @@
#include "../Reader/SlidePersist.cpp"
#include "../Records/Animations/AnimationTypes.cpp"
#include "../Records/Drawing/ArtBlip.cpp"
#include "../Records/Drawing/ShapeContainer.cpp"
#include "../PPTXWriter/Converter.cpp"
#include "../PPTXWriter/ShapeWriter.cpp"

View File

@ -285,115 +285,128 @@ void NSPresentationEditor::CPPTXWriter::WriteApp(CFile& oFile)
{
CP_XML_ATTR(L"xmlns", L"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties");
CP_XML_ATTR(L"xmlns:vt", L"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypess");
}
CP_XML_NODE(L"Application")
{
CP_XML_STREAM() << 0;
}
#if defined(INTVER)
CP_XML_NODE(L"AppVersion")
{
CP_XML_NODE(L"TotalTime")
{
CP_XML_STREAM() << 0;
}
CP_XML_NODE(L"Words")
{
CP_XML_STREAM() << 0;
}
std::wstring sApplication = L"ONLYOFFICE";
#if defined(INTVER)
std::string s = VALUE2STR(INTVER);
CP_XML_STREAM() << std::wstring(s.begin(), s.end());
}
#endif
CP_XML_NODE(L"TotalTime")
{
CP_XML_STREAM() << 0;
}
CP_XML_NODE(L"Words")
{
CP_XML_STREAM() << 0;
}
CP_XML_NODE(L"PresentationFormat")
{
CP_XML_STREAM() << L"On-screen Show (4:3)";
}
CP_XML_NODE(L"Paragraphs")
{
CP_XML_STREAM() << 0;
}
CP_XML_NODE(L"Slides")
{
CP_XML_STREAM() << m_pDocument->m_arSlides.size();
}
CP_XML_NODE(L"Notes")
{
CP_XML_STREAM() << m_pDocument->m_arNotes.size();
}
CP_XML_NODE(L"HiddenSlides")
{
CP_XML_STREAM() << 0;
}
CP_XML_NODE(L"MMClips")
{
CP_XML_STREAM() << 2;
}
CP_XML_NODE(L"ScaleCrop")
{
CP_XML_STREAM() << L"false";
}
CP_XML_NODE(L"HeadingPairs")
{
CP_XML_NODE(L"vt:vector")
sApplication += L"/" + std::wstring(s.begin(), s.end());
#endif
CP_XML_NODE(L"Application")
{
CP_XML_ATTR(L"size", 4);
CP_XML_ATTR(L"baseType", L"variant");
CP_XML_NODE(L"vt:variant")
{
CP_XML_ATTR(L"vt:lpstr", L"Theme");
}
CP_XML_NODE(L"vt:variant")
{
CP_XML_ATTR(L"vt:i4", m_pDocument->m_arThemes.size());
}
CP_XML_NODE(L"vt:variant")
{
CP_XML_ATTR(L"vt:lpstr", L"Slide Titles");
}
CP_XML_NODE(L"vt:variant")
{
CP_XML_ATTR(L"vt:i4", m_pDocument->m_arSlides.size());
}
CP_XML_STREAM() << sApplication;
}
}
CP_XML_NODE(L"TitlesOfParts")
{
CP_XML_NODE(L"vt:vector")
//CP_XML_NODE(L"AppVersion")
//{
// CP_XML_STREAM() << L"1.0";
//}
CP_XML_NODE(L"Paragraphs")
{
CP_XML_ATTR(L"size", m_pDocument->m_arSlides.size() + m_pDocument->m_arThemes.size());
CP_XML_ATTR(L"baseType", L"lpstr");
for (size_t i = 1; i <= m_pDocument->m_arThemes.size(); ++i)
{
CP_XML_NODE(L"vt:lpstr")
{
CP_XML_STREAM() << L"Theme " << i;
}
}
for (size_t i = 1; i <= m_pDocument->m_arSlides.size(); ++i)
{
CP_XML_NODE(L"vt:lpstr")
{
CP_XML_STREAM() << L"Slide " << i;
}
}
CP_XML_STREAM() << 0;
}
}
//CP_XML_NODE(L"PresentationFormat")
//{
// CP_XML_STREAM() << L"On-screen Show (4:3)";
//}
//CP_XML_NODE(L"Slides")
//{
// CP_XML_STREAM() << m_pDocument->m_arSlides.size();
//}
//CP_XML_NODE(L"Notes")
//{
// CP_XML_STREAM() << m_pDocument->m_arNotes.size();
//}
//CP_XML_NODE(L"HiddenSlides")
//{
// CP_XML_STREAM() << 0;
//}
//CP_XML_NODE(L"MMClips")
//{
// CP_XML_STREAM() << 0;
//}
//CP_XML_NODE(L"ScaleCrop")
//{
// CP_XML_STREAM() << L"false";
//}
//CP_XML_NODE(L"HeadingPairs")
//{
// CP_XML_NODE(L"vt:vector")
// {
// CP_XML_ATTR(L"size", 4);
// CP_XML_ATTR(L"baseType", L"variant");
//
// CP_XML_NODE(L"vt:variant")
// {
// CP_XML_NODE(L"vt:lpstr")
// {
// CP_XML_STREAM() << L"Theme";
// }
// }
// CP_XML_NODE(L"vt:variant")
// {
// CP_XML_NODE(L"vt:i4")
// {
// CP_XML_STREAM() << m_pDocument->m_arThemes.size();
// }
// }
// CP_XML_NODE(L"vt:variant")
// {
// CP_XML_NODE(L"vt:lpstr")
// {
// CP_XML_STREAM() << L"Slide Titles";
// }
// }
// CP_XML_NODE(L"vt:variant")
// {
// CP_XML_NODE(L"vt:i4")
// CP_XML_STREAM() << m_pDocument->m_arSlides.size();
// }
// }
//}
//CP_XML_NODE(L"TitlesOfParts")
//{
// CP_XML_NODE(L"vt:vector")
// {
// CP_XML_ATTR(L"size", m_pDocument->m_arSlides.size() + m_pDocument->m_arThemes.size());
// CP_XML_ATTR(L"baseType", L"lpstr");
CP_XML_NODE(L"Company");
CP_XML_NODE(L"LinksUpToDate")
{
CP_XML_STREAM() << L"false";
}
CP_XML_NODE(L"SharedDoc")
{
CP_XML_STREAM() << L"false";
}
CP_XML_NODE(L"HyperlinksChanged")
{
CP_XML_STREAM() << L"false";
// for (size_t i = 1; i <= m_pDocument->m_arThemes.size(); ++i)
// {
// CP_XML_NODE(L"vt:lpstr")
// {
// CP_XML_STREAM() << L"Theme " << i;
// }
// }
// for (size_t i = 1; i <= m_pDocument->m_arSlides.size(); ++i)
// {
// CP_XML_NODE(L"vt:lpstr")
// {
// CP_XML_STREAM() << L"Slide " << i;
// }
// }
// }
//}
////CP_XML_NODE(L"Company");
//CP_XML_NODE(L"LinksUpToDate")
//{
// CP_XML_STREAM() << L"false";
//}
//CP_XML_NODE(L"SharedDoc")
//{
// CP_XML_STREAM() << L"false";
//}
//CP_XML_NODE(L"HyperlinksChanged")
//{
// CP_XML_STREAM() << L"false";
//}
}
}
oFile.WriteStringUTF8(L"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
@ -577,14 +590,14 @@ void NSPresentationEditor::CPPTXWriter::WriteThemes()
for (size_t i = 0; i < m_pDocument->m_arThemes.size(); i++)
{
WriteTheme(&m_pDocument->m_arThemes[i], nIndexTheme, nStartLayout, 1);
WriteTheme(&m_pDocument->m_arThemes[i], nIndexTheme, nStartLayout);
}
WriteTheme(m_pDocument->m_pNotesMaster, nIndexTheme, nStartLayout, 2);
WriteTheme(m_pDocument->m_pHandoutMaster, nIndexTheme, nStartLayout, 3);
WriteTheme(m_pDocument->m_pNotesMaster, nIndexTheme, nStartLayout);
WriteTheme(m_pDocument->m_pHandoutMaster, nIndexTheme, nStartLayout);
}
void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexTheme, int & nStartLayout, int Type)
void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexTheme, int & nStartLayout)
{
if (!pTheme) return;
@ -665,15 +678,15 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
CStringWriter oWriter;
oWriter.WriteString(L"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>");
if (Type == 1)
if (pTheme->m_eType == typeMaster)
{
oWriter.WriteString(L"<p:sldMaster xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\">");
}
else if (Type == 2)
else if (pTheme->m_eType == typeNotesMaster)
{
oWriter.WriteString(L"<p:notesMaster xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\">");
}
else if (Type == 3)
else if (pTheme->m_eType == typeHandoutMaster)
{
oWriter.WriteString(L"<p:handoutMaster xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\">");
}
@ -688,12 +701,22 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
for (size_t nEl = 0; nEl < pTheme->m_arElements.size(); ++nEl)
{
if (!pTheme->m_arElements[nEl]) continue;
if (isBodyPlaceholder(pTheme->m_arElements[nEl]->m_lPlaceholderType))
pTheme->m_arElements[nEl]->m_lPlaceholderType =100; //body тип прописывать !!
pTheme->m_arElements[nEl]->m_lPlaceholderType = 100; //body тип прописывать !!
if (pTheme->m_arElements[nEl]->m_bBoundsEnabled == false)
continue;
if (pTheme->m_eType == typeNotesMaster ||
pTheme->m_eType == typeHandoutMaster)
{
pTheme->m_arElements[nEl]->m_lPlaceholderID = -1;
if (pTheme->m_eType == typeHandoutMaster)
pTheme->m_arElements[nEl]->m_lPlaceholderSizePreset = -1;
}
WriteElement(oWriter, oRels, pTheme->m_arElements[nEl]);
}
@ -702,7 +725,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
std::wstring strOverrideColorScheme = _T("<p:clrMap bg1=\"lt1\" tx1=\"dk1\" bg2=\"lt2\" tx2=\"dk2\" accent1=\"accent1\" accent2=\"accent2\" accent3=\"accent3\" accent4=\"accent4\" accent5=\"accent5\" accent6=\"accent6\" hlink=\"hlink\" folHlink=\"folHlink\"/>");
oWriter.WriteString(strOverrideColorScheme);
if (Type == 1)
if (pTheme->m_eType == typeMaster)
{
oWriter.WriteString(std::wstring(L"<p:sldLayoutIdLst>"));
@ -726,7 +749,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
if (!pTheme->m_bHasFooter) oWriter.WriteString(std::wstring(L" ftr=\"0\""));
oWriter.WriteString(std::wstring(L"/>"));
}
if (Type == 1)
if (pTheme->m_eType == typeMaster)
{
oWriter.WriteString(std::wstring(L"<p:txStyles>"));
@ -744,7 +767,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
oWriter.WriteString(std::wstring(L"</p:txStyles>"));
}
else if (Type == 2)
else if (pTheme->m_eType == typeNotesMaster)
{
oWriter.WriteString(std::wstring(L"<p:notesStyle>"));
CStylesWriter::ConvertStyles(pTheme->m_pStyles[1], pTheme->m_oInfo, oWriter, 9);
@ -753,7 +776,8 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
std::wstring strSlideMasterFile;
std::wstring strSlideMasterRelsFile;
if (Type == 1)
if (pTheme->m_eType == typeMaster)
{
oWriter.WriteString(std::wstring(L"</p:sldMaster>"));
@ -763,7 +787,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
strSlideMasterRelsFile = L"slideMaster" + std::to_wstring(nIndexTheme + 1) + L".xml.rels";
strSlideMasterRelsFile = strPptDirectory + _T("slideMasters") + FILE_SEPARATOR_STR + _T("_rels") + FILE_SEPARATOR_STR + strSlideMasterRelsFile;
}
else if (Type == 2)
else if (pTheme->m_eType == typeNotesMaster)
{
oWriter.WriteString(std::wstring(L"</p:notesMaster>"));
@ -773,7 +797,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
strSlideMasterRelsFile = L"notesMaster1.xml.rels";
strSlideMasterRelsFile = strPptDirectory + _T("notesMasters") + FILE_SEPARATOR_STR + _T("_rels") + FILE_SEPARATOR_STR + strSlideMasterRelsFile;
}
else if (Type == 3)
else if (pTheme->m_eType == typeHandoutMaster)
{
oWriter.WriteString(std::wstring(L"</p:handoutMaster>"));
@ -933,6 +957,11 @@ void NSPresentationEditor::CPPTXWriter::WriteLayout(CLayout& oLayout, int nIndex
size_t nElements = oLayout.m_arElements.size();
for (size_t nEl = 0; nEl < nElements; ++nEl)
{
if (!oLayout.m_arElements[nEl])
continue;
oLayout.m_arElements[nEl]->m_lPlaceholderSizePreset = -1;
WriteElement(oWriter, oRels, oLayout.m_arElements[nEl]);
}

View File

@ -72,7 +72,7 @@ namespace NSPresentationEditor
void WritePresInfo ();
void WriteAll ();
void WriteThemes ();
void WriteTheme (CTheme* pTheme, int & nIndexTheme, int & nStartLayout, int Type = 1);
void WriteTheme (CTheme* pTheme, int & nIndexTheme, int & nStartLayout);
void WriteSlides ();
void WriteNotes ();
void WriteLayout (CLayout& oLayout, int nIndexLayout, int nStartLayout, int nIndexTheme);

View File

@ -35,6 +35,11 @@
#include "../../../ASCOfficeXlsFile2/source/XlsXlsxConverter/ShapeType.h"
#include "../../../Common/MS-LCID.h"
#include "../../../ASCOfficePPTXFile/PPTXFormat/Logic/SpTreeElem.h"
#include "../../../ASCOfficePPTXFile/PPTXFormat/Logic/Shape.h"
#include "../../../ASCOfficePPTXFile/PPTXFormat/Logic/SpTree.h"
#ifndef EMU_MM
#define EMU_MM 36000.0
#endif
@ -307,6 +312,10 @@ bool NSPresentationEditor::CShapeWriter::SetElement(CElementPtr pElem)
m_bWordArt = false;
m_bTextBox = false;
m_xmlGeomAlternative.clear();
m_xmlTxBodyAlternative.clear();
m_xmlAlternative.clear();
if (m_pShapeElement)
{
m_pShapeElement->m_pShape->GetTextRect(m_oTextRect);
@ -811,6 +820,13 @@ void NSPresentationEditor::CShapeWriter::WriteTextInfo()
{
size_t nCount = m_pShapeElement->m_pShape->m_oText.m_arParagraphs.size();
if (false == m_xmlTxBodyAlternative.empty())
{
m_oWriter.WriteString(m_xmlTxBodyAlternative);
return;
}
m_oWriter.WriteString(std::wstring(L"<p:txBody>"));
m_oWriter.WriteString(std::wstring(L"<a:bodyPr" ));
@ -1227,6 +1243,59 @@ void NSPresentationEditor::CShapeWriter::WriteTextInfo()
m_oWriter.WriteString(str5);
}
void NSPresentationEditor::CShapeWriter::ParseXmlAlternative(const std::wstring & xml)
{
XmlUtils::CXmlLiteReader oReader;
if ( !oReader.FromString( xml ) )
return;
if ( !oReader.ReadNextNode() )
return;
std::wstring sName = XmlUtils::GetNameNoNS(oReader.GetName());
nullable<PPTX::Logic::SpTreeElem> oElement;
if ( L"graphicFrame" == sName || L"pic" == sName || L"sp" == sName || L"grpSp" == sName || L"cxnSp" == sName || L"AlternateContent" == sName
|| L"spTree" )
{
oElement = oReader;
}
if (oElement.IsInit())
{
smart_ptr<PPTX::Logic::Shape> shape = oElement->GetElem().smart_dynamic_cast<PPTX::Logic::Shape>();
if (shape.IsInit())
{
NSBinPptxRW::CXmlWriter writer(XMLWRITER_DOC_TYPE_PPTX);
shape->spPr.Geometry.toXmlWriter(&writer);
if (shape->spPr.scene3d.IsInit())
shape->spPr.scene3d->toXmlWriter(&writer);
if (shape->spPr.sp3d.IsInit())
shape->spPr.sp3d->toXmlWriter(&writer);
m_xmlGeomAlternative = writer.GetXmlString();
writer.ClearNoAttack();
if ((shape->txBody.IsInit()) && (shape->txBody->bodyPr.IsInit()) && (shape->txBody->bodyPr->prstTxWarp.IsInit()))
{//только WordArt
shape->txBody->toXmlWriter(&writer);
m_xmlTxBodyAlternative = writer.GetXmlString();
}
}
smart_ptr<PPTX::Logic::SpTree> groupShape = oElement->GetElem().smart_dynamic_cast<PPTX::Logic::SpTree>();
if (groupShape.IsInit())
{//smartArt
NSBinPptxRW::CXmlWriter writer(XMLWRITER_DOC_TYPE_PPTX);
groupShape->toXmlWriter(&writer);
m_xmlAlternative = writer.GetXmlString();
}
}
}
std::wstring NSPresentationEditor::CShapeWriter::ConvertShape()
{
if (m_pImageElement) return ConvertImage();
@ -1256,6 +1325,12 @@ std::wstring NSPresentationEditor::CShapeWriter::ConvertShape()
m_pShapeElement->m_bLine = true;
}
if (m_pShapeElement->m_pShape && !m_pShapeElement->m_pShape->m_strXmlString.empty())
{
ParseXmlAlternative(m_pShapeElement->m_pShape->m_strXmlString);
}
m_oWriter.WriteString(std::wstring(L"<p:sp>"));
WriteShapeInfo();
@ -1313,30 +1388,37 @@ std::wstring NSPresentationEditor::CShapeWriter::ConvertShape()
m_pShapeElement->m_pShape->ToRenderer(dynamic_cast<IRenderer*>(this), oInfo, m_oMetricInfo, 0.0, 1.0);
}
if ((prstGeom.empty() == false || m_pShapeElement->m_bShapePreset) && prstTxWarp.empty() && !shape->m_bCustomShape)
if (!m_xmlGeomAlternative.empty())
{
if (prstGeom.empty()) prstGeom = L"rect";
m_oWriter.WriteString(std::wstring(L"<a:prstGeom"));
{
m_oWriter.WriteString(std::wstring(L" prst=\"") + prstGeom + std::wstring(L"\">"));
if (!m_bWordArt)
{
m_oWriter.WriteString(std::wstring(L"<a:avLst/>"));
}
}
m_oWriter.WriteString(std::wstring(L"</a:prstGeom>"));
}
else if (prstTxWarp.empty())
{
m_oWriter.WriteString(m_pShapeElement->ConvertPPTShapeToPPTX());
m_oWriter.WriteString(m_xmlGeomAlternative);
}
else
{
//word art
m_oWriter.WriteString(std::wstring(L"<a:prstGeom prst=\"rect\"/>"));
if ((prstGeom.empty() == false || m_pShapeElement->m_bShapePreset) && prstTxWarp.empty() && !shape->m_bCustomShape)
{
if (prstGeom.empty()) prstGeom = L"rect";
m_oWriter.WriteString(std::wstring(L"<a:prstGeom"));
{
m_oWriter.WriteString(std::wstring(L" prst=\"") + prstGeom + std::wstring(L"\">"));
if (!m_bWordArt)
{
m_oWriter.WriteString(std::wstring(L"<a:avLst/>"));
}
}
m_oWriter.WriteString(std::wstring(L"</a:prstGeom>"));
}
else if (prstTxWarp.empty())
{
m_oWriter.WriteString(m_pShapeElement->ConvertPPTShapeToPPTX());
}
else
{
//word art
m_oWriter.WriteString(std::wstring(L"<a:prstGeom prst=\"rect\"/>"));
}
}
if (!m_bWordArt)
if (false == m_bWordArt)
{
m_oWriter.WriteString(ConvertBrush(m_pShapeElement->m_oBrush));
if (m_pShapeElement->m_bLine)

View File

@ -138,6 +138,8 @@ namespace NSPresentationEditor
class CShapeWriter : public IRenderer
{
private:
void ParseXmlAlternative(const std::wstring & xml);
NSPresentationEditor::CStringWriter m_oWriterPath;
NSPresentationEditor::CStringWriter m_oWriterVML;
NSPresentationEditor::CStringWriter m_oWriter;
@ -154,6 +156,10 @@ namespace NSPresentationEditor
bool m_bWordArt;
bool m_bTextBox;
std::wstring m_xmlGeomAlternative;
std::wstring m_xmlTxBodyAlternative;
std::wstring m_xmlAlternative;
public:
CShapeWriter();

View File

@ -1559,7 +1559,7 @@ void CPPTUserInfo::LoadMasters(const LONG& lOriginWidth, const LONG& lOriginHeig
{
std::map<DWORD, CRecordSlide*>::iterator pPair = m_mapNotesMasters.begin();
LoadMaster(pPair->second, m_pNotesMasterWrapper, m_pNotesMaster);
LoadMaster(typeNotesMaster, pPair->second, m_pNotesMasterWrapper, m_pNotesMaster);
}
LoadHandoutMasterFromPrevUsers(0);
@ -1567,10 +1567,10 @@ void CPPTUserInfo::LoadMasters(const LONG& lOriginWidth, const LONG& lOriginHeig
{
std::map<DWORD, CRecordSlide*>::iterator pPair = m_mapHandoutMasters.begin();
LoadMaster(pPair->second, m_pHandoutMasterWrapper, m_pHandoutMaster);
LoadMaster(typeHandoutMaster, pPair->second, m_pHandoutMasterWrapper, m_pHandoutMaster);
}
}
void CPPTUserInfo::LoadMaster(CRecordSlide* pMaster, CSlideInfo *& pMasterWrapper, CTheme *& pTheme)
void CPPTUserInfo::LoadMaster(_typeMaster type, CRecordSlide* pMaster, CSlideInfo *& pMasterWrapper, CTheme *& pTheme)
{
if (pMaster == NULL)
return;
@ -1609,7 +1609,7 @@ void CPPTUserInfo::LoadMaster(CRecordSlide* pMaster, CSlideInfo *& pMasterWrappe
}
}
pTheme = new CTheme();
pTheme = new CTheme(type);
pTheme->m_lOriginalWidth = lOriginWidth;
pTheme->m_lOriginalHeight = lOriginHeight;
@ -1970,8 +1970,8 @@ void CPPTUserInfo::LoadNoMainMaster(DWORD dwMasterID, const LONG& lOriginWidth,
{
if (pElement->m_lPlaceholderID >=0)
{
if (pElement->m_lPlaceholderType == MasterSlideNumber) pLayout->m_bHasSlideNumber = true;
if (pElement->m_lPlaceholderType == MasterDate) pLayout->m_bHasDate = true;
if (pElement->m_lPlaceholderType == MasterSlideNumber) pLayout->m_bHasSlideNumber = true;
if (pElement->m_lPlaceholderType == MasterDate) pLayout->m_bHasDate = true;
if (pElement->m_lPlaceholderType == MasterFooter) pLayout->m_bHasFooter = true;
}
pLayout->m_mapPlaceholders.insert(std::pair<int, int>(pElement->m_lPlaceholderType, pLayout->m_arElements.size()-1));

View File

@ -44,6 +44,7 @@ using namespace NSPresentationEditor;
class CPPTUserInfo : public CDocument
{
public:
CUserEdit m_oUser;
std::map<DWORD, DWORD> m_mapOffsetInPIDs;
CRecordDocument m_oDocument;
@ -145,7 +146,7 @@ public:
void LoadNoMainMaster (DWORD dwMasterID, const LONG& lOriginWidth, const LONG& lOriginHeight);
void LoadMainMaster (DWORD dwMasterID, const LONG& lOriginWidth, const LONG& lOriginHeight);
void LoadMaster(CRecordSlide* pMaster, CSlideInfo *& pMasterWrapper, CTheme *& pTheme);
void LoadMaster(_typeMaster type, CRecordSlide* pMaster, CSlideInfo *& pMasterWrapper, CTheme *& pTheme);
void LoadSlideFromPrevUsers (DWORD dwSlideID);
void LoadMasterFromPrevUsers (DWORD dwSlideID);

View File

@ -59,109 +59,7 @@ public:
{
}
virtual void ReadFromStream(SRecordHeader & oHeader, POLE::Stream* pStream)
{
CRecordsContainer::ReadFromStream(oHeader, pStream);
virtual void ReadFromStream(SRecordHeader & oHeader, POLE::Stream* pStream);
// вот... а теперь нужно взять и узнать перерасчет системы координат
std::vector<CRecordShapeContainer*> oArrayShapes;
GetRecordsByType(&oArrayShapes, false, false);
if (!oArrayShapes.empty())
oArrayShapes[0]->bGroupShape = true;//тут описание самой группы
int nIndexBreak = -1;
for (size_t nIndex = 0; nIndex < oArrayShapes.size(); ++nIndex)
{
std::vector<CRecordGroupShape*> oArrayGroupShapes;
oArrayShapes[nIndex]->GetRecordsByType(&oArrayGroupShapes, false, true);
if ( oArrayGroupShapes.size() > 0 )
{
m_rcGroupBounds.left = oArrayGroupShapes[0]->m_oBounds.left;
m_rcGroupBounds.top = oArrayGroupShapes[0]->m_oBounds.top;
m_rcGroupBounds.right = oArrayGroupShapes[0]->m_oBounds.right;
m_rcGroupBounds.bottom = oArrayGroupShapes[0]->m_oBounds.bottom;
std::vector<CRecordClientAnchor*> oArrayClients;
oArrayShapes[nIndex]->GetRecordsByType(&oArrayClients, false, true);
if ( oArrayClients.size() > 0)
{
m_rcGroupClientAnchor.left = (LONG)oArrayClients[0]->m_oBounds.Left;
m_rcGroupClientAnchor.top = (LONG)oArrayClients[0]->m_oBounds.Top;
m_rcGroupClientAnchor.right = (LONG)oArrayClients[0]->m_oBounds.Right;
m_rcGroupClientAnchor.bottom = (LONG)oArrayClients[0]->m_oBounds.Bottom;
}
else
{
std::vector<CRecordChildAnchor*> oArrayChilds;
oArrayShapes[nIndex]->GetRecordsByType(&oArrayChilds, false, true);
if ( oArrayChilds.size() > 0)
{
m_rcGroupClientAnchor.left = (LONG)oArrayChilds[0]->m_oBounds.left;
m_rcGroupClientAnchor.top = (LONG)oArrayChilds[0]->m_oBounds.top;
m_rcGroupClientAnchor.right = (LONG)oArrayChilds[0]->m_oBounds.right;
m_rcGroupClientAnchor.bottom = (LONG)oArrayChilds[0]->m_oBounds.bottom;
}
}
nIndexBreak = nIndex;
break;
}
}
LONG lW1 = m_rcGroupBounds.right - m_rcGroupBounds.left;
LONG lH1 = m_rcGroupBounds.bottom - m_rcGroupBounds.top;
LONG lW2 = m_rcGroupClientAnchor.right - m_rcGroupClientAnchor.left;
LONG lH2 = m_rcGroupClientAnchor.bottom - m_rcGroupClientAnchor.top;
bool bIsRecalc = ((lW1 > 0) && (lH1 > 0) && (lW2 > 0) && (lH2 > 0));
if (bIsRecalc)
{
for (size_t nIndex = 0; nIndex < oArrayShapes.size(); ++nIndex)
{
if (nIndex != nIndexBreak)
{
oArrayShapes[nIndex]->m_pGroupBounds = &m_rcGroupBounds;
oArrayShapes[nIndex]->m_pGroupClientAnchor = &m_rcGroupClientAnchor;
}
}
}
}
void SetGroupRect()
{
std::vector<CRecordGroupShapeContainer*> oArrayGroupContainer;
this->GetRecordsByType(&oArrayGroupContainer, false, false);
int nCountGroups = oArrayGroupContainer.size();
for (int i = 0; i < nCountGroups; ++i)
{
LONG lWidthGroup = m_rcGroupBounds.right - m_rcGroupBounds.left;
LONG lHeightGroup = m_rcGroupBounds.bottom - m_rcGroupBounds.top;
LONG lWidthClient = m_rcGroupClientAnchor.right - m_rcGroupClientAnchor.left;
LONG lHeightClient = m_rcGroupClientAnchor.bottom - m_rcGroupClientAnchor.top;
bool bIsRecalc = ((lWidthClient > 0) && (lHeightClient > 0) && (lWidthGroup > 0) && (lHeightGroup > 0));
if (bIsRecalc)
{
// здесь переводим координаты, чтобы они не зависили от группы
double dScaleX = (double)(lWidthClient) / (lWidthGroup);
double dScaleY = (double)(lHeightClient) / (lHeightGroup);
RECT* prcChildAnchor = &oArrayGroupContainer[i]->m_rcGroupClientAnchor;
prcChildAnchor->left = m_rcGroupClientAnchor.left + (LONG)(dScaleX * (prcChildAnchor->left - m_rcGroupBounds.left));
prcChildAnchor->right = m_rcGroupClientAnchor.left + (LONG)(dScaleX * (prcChildAnchor->right - m_rcGroupBounds.left));
prcChildAnchor->top = m_rcGroupClientAnchor.top + (LONG)(dScaleY * (prcChildAnchor->top - m_rcGroupBounds.top));
prcChildAnchor->bottom = m_rcGroupClientAnchor.top + (LONG)(dScaleY * (prcChildAnchor->bottom - m_rcGroupBounds.top));
}
oArrayGroupContainer[i]->SetGroupRect();
}
}
void SetGroupRect();
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -341,138 +341,6 @@
<References>
</References>
<Files>
<Filter
Name="Common"
>
<File
RelativePath="..\..\..\ASCOfficeXlsFile2\source\XlsFormat\Binary\CFRecord.cpp"
>
</File>
<File
RelativePath="..\..\..\ASCOfficeXlsFile2\source\XlsFormat\Binary\CFStream.cpp"
>
</File>
<File
RelativePath="..\..\..\XlsxSerializerCom\Reader\ChartFromToBinary.cpp"
>
</File>
<File
RelativePath="..\..\..\ASCOfficeDocxFile2\DocWrapper\ChartWriter.cpp"
>
</File>
<File
RelativePath="..\..\..\XlsxSerializerCom\Common\Common.cpp"
>
</File>
<File
RelativePath="..\..\..\XlsxSerializerCom\Reader\CommonWriter.cpp"
>
</File>
<File
RelativePath="..\..\..\XlsxSerializerCom\Reader\CSVReader.cpp"
>
</File>
<File
RelativePath="..\..\..\XlsxSerializerCom\Writer\CSVWriter.cpp"
>
</File>
<File
RelativePath="..\..\..\ASCOfficeDocxFile2\DocWrapper\DocxSerializer.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/bigobj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/bigobj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/bigobj"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\Common\FileDownloader\FileDownloader_win.cpp"
>
</File>
<File
RelativePath="..\..\..\DesktopEditor\fontengine\FontManager.cpp"
>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/bigobj"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\ASCOfficeDocxFile2\DocWrapper\FontProcessor.cpp"
>
</File>
<File
RelativePath="..\..\..\UnicodeConverter\UnicodeConverter.cpp"
>
</File>
<File
RelativePath="..\..\..\ASCOfficeDocxFile2\DocWrapper\XlsxSerializer.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/bigobj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/bigobj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/bigobj"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\ASCOfficePPTXFile\Editor\xmlwriter.h"
>
</File>
<Filter
Name="pole"
>
<File
RelativePath="..\..\..\Common\3dParty\pole\pole.cpp"
>
</File>
<File
RelativePath="..\..\..\Common\3dParty\pole\pole.h"
>
</File>
</Filter>
</Filter>
<Filter
Name="Reader"
>
@ -935,6 +803,10 @@
RelativePath="..\Records\Drawing\Shape.h"
>
</File>
<File
RelativePath="..\Records\Drawing\ShapeContainer.cpp"
>
</File>
<File
RelativePath="..\Records\Drawing\ShapeContainer.h"
>

View File

@ -68,14 +68,6 @@ EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpsd", "..\..\DesktopEditor\cximage\libpsd\libpsd_vs2005.vcproj", "{9A037A69-D1DF-4505-AB2A-6CB3641C476E}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PPTFormatLib", "..\PPTFormatLib\Win32\PPTFormatLib.vcproj", "{7B27E40E-F70A-4A74-A77C-0944D7931D15}"
ProjectSection(ProjectDependencies) = postProject
{21663823-DE45-479B-91D0-B4FEF4916EF0} = {21663823-DE45-479B-91D0-B4FEF4916EF0}
{9CAA294E-58C3-4CEB-ABA0-CB9786CA5540} = {9CAA294E-58C3-4CEB-ABA0-CB9786CA5540}
{37CA072A-5BDE-498B-B3A7-5E404F5F9BF2} = {37CA072A-5BDE-498B-B3A7-5E404F5F9BF2}
{C739151F-5384-41DF-A1A6-F089E2C1AD56} = {C739151F-5384-41DF-A1A6-F089E2C1AD56}
{A100103A-353E-45E8-A9B8-90B87CC5C0B0} = {A100103A-353E-45E8-A9B8-90B87CC5C0B0}
{36636678-AE25-4BE6-9A34-2561D1BCF302} = {36636678-AE25-4BE6-9A34-2561D1BCF302}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jbig", "..\..\DesktopEditor\cximage\jbig\jbig_vs2005.vcproj", "{764C3A2D-FB0F-428E-B1C7-62D1DD2CE239}"
EndProject
@ -85,15 +77,15 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OfficeUtilsLib", "..\..\Off
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PptFormatTest", "PptFormatTest.vcproj", "{0F49D5D1-A8D3-4F97-8BC1-E2F65BB00C10}"
ProjectSection(ProjectDependencies) = postProject
{7B27E40E-F70A-4A74-A77C-0944D7931D15} = {7B27E40E-F70A-4A74-A77C-0944D7931D15}
{3423EC9A-52E4-4A4D-9753-EDEBC38785EF} = {3423EC9A-52E4-4A4D-9753-EDEBC38785EF}
{C27E9A9F-3A17-4482-9C5F-BF15C01E747C} = {C27E9A9F-3A17-4482-9C5F-BF15C01E747C}
{37CA072A-5BDE-498B-B3A7-5E404F5F9BF2} = {37CA072A-5BDE-498B-B3A7-5E404F5F9BF2}
{9CAA294E-58C3-4CEB-ABA0-CB9786CA5540} = {9CAA294E-58C3-4CEB-ABA0-CB9786CA5540}
{36636678-AE25-4BE6-9A34-2561D1BCF302} = {36636678-AE25-4BE6-9A34-2561D1BCF302}
{F8274B05-168E-4D6E-B843-AA7510725363} = {F8274B05-168E-4D6E-B843-AA7510725363}
{21663823-DE45-479B-91D0-B4FEF4916EF0} = {21663823-DE45-479B-91D0-B4FEF4916EF0}
{3423EC9A-52E4-4A4D-9753-EDEBC38785EF} = {3423EC9A-52E4-4A4D-9753-EDEBC38785EF}
{77DDC8D7-5B12-4FF2-9629-26AEBCA8436D} = {77DDC8D7-5B12-4FF2-9629-26AEBCA8436D}
{C27E9A9F-3A17-4482-9C5F-BF15C01E747C} = {C27E9A9F-3A17-4482-9C5F-BF15C01E747C}
{36636678-AE25-4BE6-9A34-2561D1BCF302} = {36636678-AE25-4BE6-9A34-2561D1BCF302}
{9CAA294E-58C3-4CEB-ABA0-CB9786CA5540} = {9CAA294E-58C3-4CEB-ABA0-CB9786CA5540}
{A100103A-353E-45E8-A9B8-90B87CC5C0B0} = {A100103A-353E-45E8-A9B8-90B87CC5C0B0}
{37CA072A-5BDE-498B-B3A7-5E404F5F9BF2} = {37CA072A-5BDE-498B-B3A7-5E404F5F9BF2}
{7B27E40E-F70A-4A74-A77C-0944D7931D15} = {7B27E40E-F70A-4A74-A77C-0944D7931D15}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libxml2", "..\..\DesktopEditor\xml\build\vs2005\libxml2.vcproj", "{21663823-DE45-479B-91D0-B4FEF4916EF0}"

View File

@ -44,7 +44,7 @@
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../../DesktopEditor/xml/build/vs2005;../../DesktopEditor/xml/libxml2/include"
AdditionalIncludeDirectories="../../DesktopEditor/xml/build/vs2005;../../DesktopEditor/xml/libxml2/include;&quot;../../DesktopEditor/freetype-2.5.2/include&quot;"
PreprocessorDefinitions="_DEBUG;_CONSOLE;_USE_MATH_DEFINES;_USE_LIBXML2_READER_;LIBXML_READER_ENABLED;USE_LITE_READER;_USE_XMLLITE_READER_;_PRESENTATION_WRITER_;_SVG_CONVERT_TO_IMAGE_;DONT_WRITE_EMBEDDED_FONTS"
MinimalRebuild="true"
BasicRuntimeChecks="3"

View File

@ -82,6 +82,7 @@ public:
LONG m_lDrawType;
std::wstring m_strPPTXShape;
std::wstring m_strXmlString; //alternative
NSBaseShape::ClassType m_classType;

View File

@ -39,7 +39,14 @@ typedef std::vector<std::wstring> vector_string;
namespace NSPresentationEditor
{
static void CorrectColorPPT(LONG& lSchemeIndex)
enum _typeMaster
{
typeMaster,
typeNotesMaster,
typeHandoutMaster
};
static void CorrectColorPPT(LONG& lSchemeIndex)
{
//0x00 //Background color
//0x01 //Text color

View File

@ -40,6 +40,7 @@ namespace NSPresentationEditor
class CTheme
{
public:
_typeMaster m_eType;
std::map<_UINT64, LONG> m_mapGeomToLayout;// типовые шаблоны
std::map<DWORD, LONG> m_mapTitleLayout; // заголовочные шаблоны
@ -77,8 +78,8 @@ namespace NSPresentationEditor
vector_string m_PlaceholdersReplaceString[3]; //0-dates, 1 - headers, 2 - footers
//------------------------------------------------------------------------------------
CTheme() : m_arColorScheme(), m_arFonts(), m_arBrushes(),
m_arPens(), m_arEffects(), m_arLayouts()
CTheme(_typeMaster type = typeMaster) : m_arColorScheme(), m_arFonts(), m_arBrushes(),
m_arPens(), m_arEffects(), m_arLayouts(), m_eType(type)
{
Clear();
}
@ -90,6 +91,7 @@ namespace NSPresentationEditor
CTheme& operator=(const CTheme& oSrc)
{
m_eType = oSrc.m_eType;
m_arColorScheme = oSrc.m_arColorScheme;
m_arFonts = oSrc.m_arFonts;
m_arBrushes = oSrc.m_arBrushes;
@ -106,7 +108,10 @@ namespace NSPresentationEditor
m_bHasFooter = oSrc.m_bHasFooter;
m_nFormatDate = oSrc.m_nFormatDate;
for (size_t i = 0 ; i < 3 ; i++) m_PlaceholdersReplaceString[i] = oSrc.m_PlaceholdersReplaceString[i];
for (size_t i = 0 ; i < 3 ; i++)
{
m_PlaceholdersReplaceString[i] = oSrc.m_PlaceholdersReplaceString[i];
}
for (size_t i = 0; i < oSrc.m_arExtraColorScheme.size(); ++i)
{

View File

@ -341,7 +341,32 @@ namespace PPTX
{
pReader->Seek(pReader->GetPos() - 4); //roll back to size record
std::wstring sXmlContent;
pReader->m_pMainDocument->getXmlContentElem(OOX::et_m_oMathPara, *pReader, sXmlContent);
if (pReader->m_pMainDocument)
{
pReader->m_pMainDocument->getXmlContentElem(OOX::et_m_oMathPara, *pReader, sXmlContent);
}
else
{
BinDocxRW::CDocxSerializer oDocxSerializer;
NSBinPptxRW::CDrawingConverter oDrawingConverter;
NSBinPptxRW::CBinaryFileReader* old_reader = oDrawingConverter.m_pReader;
NSBinPptxRW::CRelsGenerator* old_rels = pReader->m_pRels;
oDrawingConverter.m_pReader = pReader;
pReader->m_pRels = new NSBinPptxRW::CRelsGenerator();
oDrawingConverter.SetMainDocument(&oDocxSerializer);
oDocxSerializer.m_pCurFileWriter = new Writers::FileWriter(sDstEmbeddedTemp, L"", false, 111, false, &oDrawingConverter, L"");
oDocxSerializer.getXmlContentElem(OOX::et_m_oMathPara, *pReader, sXmlContent);
pReader->m_pRels = old_rels;
oDrawingConverter.m_pReader = old_reader;
pReader->m_pMainDocument = NULL;
}
if (!sXmlContent.empty())
{

View File

@ -776,7 +776,7 @@ public:
sResult.erase(outsize_with_0 - 1);
ansi = false;
}
#else
#elif defined(__linux__)
std::string sCodepage = "CP" + std::to_string(nCodepage);
iconv_t ic= iconv_open("WCHAR_T", sCodepage.c_str());

View File

@ -70,6 +70,13 @@ namespace XLS
L"333399", L"000000", L"FFFFFF", L"FF0000", L"00FF00", L"0000FF", L"FFFF00", L"FF00FF", L"00FFFF", L"800000",
L"800000", L"800000", L"808000", L"800080", L"808000", L"C0C0C0", L"808080", L"9999FF", L"993366", L"FFFFCC",
L"CCFFFF", L"660066", L"FF8080", L"0066CC", L"CCCCFF",
//todoooo - подглядеть какие в мс далее
L"800000", L"FF00FF", L"FFFF00", L"00FFFF", L"800080", L"800000", L"808000", L"0000FF", L"00CCFF", L"CCFFFF",
L"CCFFCC", L"FFFF99", L"99CCFF", L"FF99CC", L"CC99FF", L"FFCC99", L"3366FF", L"33CCCC", L"99CC00", L"FFCC00",
L"FF9900", L"FF6600", L"666699", L"969696", L"336600", L"339966", L"330000", L"333300", L"993300", L"993366",
L"333399", L"000000", L"FFFFFF", L"FF0000", L"00FF00", L"0000FF", L"FFFF00", L"FF00FF", L"00FFFF", L"800000",
L"800000", L"800000", L"808000", L"800080", L"808000", L"C0C0C0", L"808080", L"9999FF", L"993366", L"FFFFCC",
L"CCFFFF", L"660066", L"FF8080", L"0066CC", L"CCCCFF"
};

4
CHANGELOG.md Normal file
View File

@ -0,0 +1,4 @@
# Change log
## 5.1.1
### x2t
*

View File

@ -51,6 +51,7 @@ SOURCES += \
../Source/DocxFormat/Logic/TableProperty.cpp \
../Source/DocxFormat/Logic/Vml.cpp \
../Source/DocxFormat/Media/VbaProject.cpp \
../Source/DocxFormat/Media/JsaProject.cpp \
../Source/DocxFormat/Math/oMath.cpp \
../Source/DocxFormat/Math/oMathContent.cpp \
../Source/DocxFormat/Math/oMathPara.cpp \

View File

@ -49,6 +49,7 @@
#include "../Source/DocxFormat/Math/oMathContent.cpp"
#include "../Source/DocxFormat/Math/oMathPara.cpp"
#include "../Source/DocxFormat/Media/VbaProject.cpp"
#include "../Source/DocxFormat/Media/JsaProject.cpp"
#include "../Source/DocxFormat/Docx.cpp"
#include "../Source/DocxFormat/FileFactory.cpp"
#include "../Source/DocxFormat/IFileContainer.cpp"

View File

@ -214,6 +214,8 @@
690FE0831E9BBA23004B26D0 /* DiagramDrawing.h in Headers */ = {isa = PBXBuildFile; fileRef = 690FE0811E9BBA23004B26D0 /* DiagramDrawing.h */; };
691C3E131F20C3D500F1775E /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 691C3E121F20C3D500F1775E /* File.cpp */; };
69B3ADA920120093000EC6A7 /* VmlDrawing.h in Headers */ = {isa = PBXBuildFile; fileRef = 69B3ADA820120093000EC6A7 /* VmlDrawing.h */; };
69DE2217206E24A700A07A0E /* JsaProject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 69DE2215206E24A700A07A0E /* JsaProject.cpp */; };
69DE2218206E24A700A07A0E /* JsaProject.h in Headers */ = {isa = PBXBuildFile; fileRef = 69DE2216206E24A700A07A0E /* JsaProject.h */; };
69E6AC872031AB0C00795D9D /* Xlsx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 69E6AC862031AB0C00795D9D /* Xlsx.cpp */; };
69E6AC892031AC3500795D9D /* SheetData.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 69E6AC882031AC3500795D9D /* SheetData.cpp */; };
69E6AC8C2031ACA900795D9D /* VbaProject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 69E6AC8A2031ACA900795D9D /* VbaProject.cpp */; };
@ -438,6 +440,8 @@
690FE0811E9BBA23004B26D0 /* DiagramDrawing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DiagramDrawing.h; sourceTree = "<group>"; };
691C3E121F20C3D500F1775E /* File.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp.preprocessed; fileEncoding = 4; path = File.cpp; sourceTree = "<group>"; };
69B3ADA820120093000EC6A7 /* VmlDrawing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = VmlDrawing.h; sourceTree = "<group>"; };
69DE2215206E24A700A07A0E /* JsaProject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JsaProject.cpp; sourceTree = "<group>"; };
69DE2216206E24A700A07A0E /* JsaProject.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JsaProject.h; sourceTree = "<group>"; };
69E6AC862031AB0C00795D9D /* Xlsx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Xlsx.cpp; sourceTree = "<group>"; };
69E6AC882031AC3500795D9D /* SheetData.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SheetData.cpp; sourceTree = "<group>"; };
69E6AC8A2031ACA900795D9D /* VbaProject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = VbaProject.cpp; sourceTree = "<group>"; };
@ -706,6 +710,8 @@
17E6A0891AC4262700F28F8B /* Video.h */,
69E6AC8A2031ACA900795D9D /* VbaProject.cpp */,
69E6AC8B2031ACA900795D9D /* VbaProject.h */,
69DE2215206E24A700A07A0E /* JsaProject.cpp */,
69DE2216206E24A700A07A0E /* JsaProject.h */,
);
path = Media;
sourceTree = "<group>";
@ -1072,6 +1078,7 @@
17C1FC711ACC4250006B99B3 /* Colors.h in Headers */,
17C1FC721ACC4250006B99B3 /* Pos.h in Headers */,
17C1FC731ACC4250006B99B3 /* CommonInclude.h in Headers */,
69DE2218206E24A700A07A0E /* JsaProject.h in Headers */,
17C1FC741ACC4250006B99B3 /* WebSettings.h in Headers */,
17C1FC751ACC4250006B99B3 /* CustomXml.h in Headers */,
17C1FC771ACC4250006B99B3 /* IFileContainer.h in Headers */,
@ -1189,6 +1196,7 @@
17C1FBB21ACC4250006B99B3 /* Vml.cpp in Sources */,
17C1FBB31ACC4250006B99B3 /* unicode_util.cpp in Sources */,
17C1FBB41ACC4250006B99B3 /* FldSimple.cpp in Sources */,
69DE2217206E24A700A07A0E /* JsaProject.cpp in Sources */,
69E6AC872031AB0C00795D9D /* Xlsx.cpp in Sources */,
17C1FBB91ACC4250006B99B3 /* Paragraph.cpp in Sources */,
17C1FBBA1ACC4250006B99B3 /* SimpleTypes_Word.cpp in Sources */,

View File

@ -1197,6 +1197,14 @@
RelativePath="..\Source\DocxFormat\Media\Image.h"
>
</File>
<File
RelativePath="..\Source\DocxFormat\Media\JsaProject.cpp"
>
</File>
<File
RelativePath="..\Source\DocxFormat\Media\JsaProject.h"
>
</File>
<File
RelativePath="..\Source\DocxFormat\Media\Media.h"
>

View File

@ -0,0 +1,112 @@
/*
* (c) Copyright Ascensio System SIA 2010-2018
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "JsaProject.h"
#include "../Docx.h"
#include "../../XlsxFormat/Xlsx.h"
#include "Media.h"
#include "../../../../../ASCOfficePPTXFile/Editor/BinaryFileReaderWriter.h"
#include "../../../../../ASCOfficePPTXFile/Editor/imagemanager.h"
#include "../IFileContainer.h"
#include "../../XlsxFormat/FileTypes_Spreadsheet.h"
namespace OOX
{
JsaProject::JsaProject( OOX::Document *pMain ) : Media(pMain)
{
OOX::CDocx* docx = dynamic_cast<OOX::CDocx*>(pMain);
if (docx)
{
docx->m_pJsaProject = this;
}
else
{
OOX::Spreadsheet::CXlsx* xlsx = dynamic_cast<OOX::Spreadsheet::CXlsx*>(pMain);
if (xlsx)
{
xlsx->m_pJsaProject = this;
}
}
}
JsaProject::JsaProject(OOX::Document *pMain, const CPath& filename) : Media(pMain)
{
OOX::CDocx* docx = dynamic_cast<OOX::CDocx*>(pMain);
if (docx)
{
docx->m_pJsaProject = this;
}
else
{
OOX::Spreadsheet::CXlsx* xlsx = dynamic_cast<OOX::Spreadsheet::CXlsx*>(pMain);
if (xlsx)
{
xlsx->m_pJsaProject = this;
}
}
read(filename);
}
void JsaProject::toPPTY(NSBinPptxRW::CBinaryFileWriter* pWriter) const
{
BYTE* pData = NULL;
DWORD nBytesCount;
if(NSFile::CFileBinary::ReadAllBytes(m_filename.GetPath(), &pData, nBytesCount))
{
pWriter->WriteBYTEArray(pData, nBytesCount);
}
}
void JsaProject::fromPPTY(NSBinPptxRW::CBinaryFileReader* pReader)
{
LONG _length = pReader->GetLong();
LONG _end_rec = pReader->GetPos() + _length;
if (_length > 0)
{
BYTE* pData = pReader->GetPointer(_length);
std::wstring filePath = pReader->m_pRels->m_pManager->GetDstFolder() + FILE_SEPARATOR_STR + OOX::FileTypes::JsaProject.DefaultFileName().GetPath();
NSFile::CFileBinary oFile;
oFile.CreateFileW(filePath);
oFile.WriteFile(pData, _length);
oFile.CloseFile();
pReader->m_pRels->m_pManager->m_pContentTypes->AddDefault(OOX::FileTypes::JsaProject.DefaultFileName().GetExtention(false));
set_filename(filePath, false);
}
pReader->Seek(_end_rec);
}
} // namespace OOX

View File

@ -34,6 +34,9 @@
#define OOX_ONLY_JSA_PROJECT_INCLUDE_H_
#include "Media.h"
#include "../../../../../ASCOfficePPTXFile/Editor/BinaryFileReaderWriter.h"
#include "../../../../../ASCOfficePPTXFile/Editor/imagemanager.h"
#include "../../XlsxFormat/FileTypes_Spreadsheet.h"
namespace OOX
@ -41,13 +44,8 @@ namespace OOX
class JsaProject : public Media
{
public:
JsaProject( OOX::Document *pMain ) : Media(pMain)
{
}
JsaProject(OOX::Document *pMain, const CPath& filename) : Media(pMain)
{
read(filename);
}
JsaProject( OOX::Document *pMain );
JsaProject(OOX::Document *pMain, const CPath& filename);
virtual ~JsaProject()
{
}
@ -63,36 +61,8 @@ namespace OOX
{
return type().DefaultFileName();
}
virtual void toPPTY(NSBinPptxRW::CBinaryFileWriter* pWriter) const
{
BYTE* pData = NULL;
DWORD nBytesCount;
if(NSFile::CFileBinary::ReadAllBytes(m_filename.GetPath(), &pData, nBytesCount))
{
pWriter->WriteBYTEArray(pData, nBytesCount);
}
}
virtual void fromPPTY(NSBinPptxRW::CBinaryFileReader* pReader)
{
LONG _length = pReader->GetLong();
LONG _end_rec = pReader->GetPos() + _length;
if (_length > 0)
{
BYTE* pData = pReader->GetPointer(_length);
std::wstring filePath = pReader->m_pRels->m_pManager->GetDstFolder() + FILE_SEPARATOR_STR + OOX::FileTypes::JsaProject.DefaultFileName().GetPath();
NSFile::CFileBinary oFile;
oFile.CreateFileW(filePath);
oFile.WriteFile(pData, _length);
oFile.CloseFile();
pReader->m_pRels->m_pManager->m_pContentTypes->AddDefault(OOX::FileTypes::JsaProject.DefaultFileName().GetExtention(false));
set_filename(filePath, false);
}
pReader->Seek(_end_rec);
}
virtual void toPPTY(NSBinPptxRW::CBinaryFileWriter* pWriter) const;
virtual void fromPPTY(NSBinPptxRW::CBinaryFileReader* pReader);
};
} // namespace OOX

View File

@ -72,6 +72,8 @@ public:
bool isPptFormatFile (POLE::Storage *storage);
bool isMS_OFFCRYPTOFormatFile (POLE::Storage * storage);
bool isDocFlatFormatFile (unsigned char* pBuffer,int dwBytes);
bool isRtfFormatFile (unsigned char* pBuffer,int dwBytes);
bool isHtmlFormatFile (unsigned char* pBuffer,int dwBytes, bool testCloseTag);
bool isPdfFormatFile (unsigned char* pBuffer,int dwBytes);

View File

@ -144,20 +144,29 @@ bool COfficeFileFormatChecker::isDocFormatFile (POLE::Storage * storage)
POLE::Stream stream(storage, L"WordDocument");
unsigned char buffer[10];
if (stream.read(buffer,10) > 0)
unsigned char buffer[64];
if (stream.read(buffer, 64) > 0)
{
//ms office 2007 encrypted contains stream WordDocument !!
std::list<std::wstring> entries = storage->entries(L"DataSpaces");
if (entries.size() > 0)
return false;
#if defined FILE_FORMAT_CHECKER_WITH_MACRO
if ((buffer[0] == 0xEC && buffer[1] == 0xA5) || // word 1997-2003
(buffer[0] == 0xDC && buffer[1] == 0xA5) || // word 1995
(buffer[0] == 0xDB && buffer[1] == 0xA5)) // word 2.0
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC;
}
else if ( isHtmlFormatFile(buffer, 64, false) )
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML_IN_CONTAINER;
return true;
}
if (storage->isDirectory(L"Macros"))
{
bMacroEnabled = true;
}
#endif
return true;
}
@ -193,14 +202,23 @@ bool COfficeFileFormatChecker::isXlsFormatFile (POLE::Storage * storage)
}
}
}
#if defined FILE_FORMAT_CHECKER_WITH_MACRO
if (storage->isDirectory(L"_VBA_PROJECT_CUR"))
{
bMacroEnabled = true;
}
#endif
return true;
}
bool COfficeFileFormatChecker::isDocFlatFormatFile (unsigned char* pBuffer, int dwBytes)
{
if (pBuffer == NULL) return false;
if ((pBuffer[0] == 0xEC && pBuffer[1] == 0xA5) ||
(pBuffer[0] == 0xDC && pBuffer[1] == 0xA5) ||
(pBuffer[0] == 0xDB && pBuffer[1] == 0xA5))
return true;
return false;
}
bool COfficeFileFormatChecker::isPptFormatFile (POLE::Storage * storage)
{
@ -236,8 +254,8 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring & fileName)
{
if ( isDocFormatFile(&storage) )
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC;
return true;
//nFileType внутри
return true;
}
else if ( isXlsFormatFile(&storage) )
{
@ -265,7 +283,7 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring & fileName)
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_OFFCRYPTO;
return true;
}
}
}
COfficeUtils OfficeUtils(NULL);
if (OfficeUtils.IsArchive(fileName) == S_OK)
@ -335,6 +353,11 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring & fileName)
{
//nFileType
}
else if (isDocFlatFormatFile(buffer,sizeRead) )
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC_FLAT; // without compaund container
}
//------------------------------------------------------------------------------------------------
file.CloseFile();

View File

@ -49,6 +49,8 @@
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTM AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x000d
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT_FLAT AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x000e
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_OTT AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x000f
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC_FLAT AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0010
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML_IN_CONTAINER AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0011
#define AVS_OFFICESTUDIO_FILE_PRESENTATION 0x0080
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0001
@ -60,7 +62,7 @@
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_POTX AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0007
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_POTM AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0008
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP_FLAT AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0009
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_OTP AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0010
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_OTP AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x000a
#define AVS_OFFICESTUDIO_FILE_SPREADSHEET 0x0100
#define AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX AVS_OFFICESTUDIO_FILE_SPREADSHEET + 0x0001

View File

@ -1,4 +1,4 @@
VERSION = 2.4.523.0
VERSION = 2.4.526.0
DEFINES += INTVER=$$VERSION
# CONFIGURATION

View File

@ -257,7 +257,7 @@ namespace agg
projects[3] = project(bounds.x2, bounds.y2);
double min = projects[0].x * projects[0].x + projects[0].y * projects[0].y;
if (projects[0].x * m_cos + projects[0].y * m_sin)
if ((projects[0].x * m_cos + projects[0].y * m_sin) < 0)
min = -min;
double max = min;
@ -307,6 +307,14 @@ namespace agg
{
if ( t < m_pPosSubColors[i] )
{
if (i == 1 && t < m_pPosSubColors[0])
{
// меньше меньшего
m_color_table[index] = m_pSubColors[0];
bFindColor = true;
break;
}
t = (t - m_pPosSubColors[i - 1]) / (m_pPosSubColors[i] - m_pPosSubColors[i - 1]);
m_color_table[index] = m_pSubColors[i - 1].gradient( m_pSubColors[i], t );
@ -437,6 +445,14 @@ namespace agg
for (int i = 1; i < m_nCountSubColors; ++i)
{
if (i == 1 && t < m_pPosSubColors[0])
{
// меньше меньшего
m_color_table[index] = m_pSubColors[0];
bFindColor = true;
break;
}
if ( t < m_pPosSubColors[i] )
{
t = (t - m_pPosSubColors[i - 1]) / (m_pPosSubColors[i] - m_pPosSubColors[i - 1]);

View File

@ -249,10 +249,10 @@
HEADER_SEARCH_PATHS = (
"$(inherited)",
/usr/include/,
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/**",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/common",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/i18n",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/io",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/**",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/common",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/i18n",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/io",
);
OTHER_LDFLAGS = "-ObjC";
PRODUCT_NAME = "$(TARGET_NAME)";
@ -278,10 +278,10 @@
HEADER_SEARCH_PATHS = (
"$(inherited)",
/usr/include/,
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/**",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/common",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/i18n",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/io",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/**",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/common",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/i18n",
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/io",
);
OTHER_LDFLAGS = "-ObjC";
PRODUCT_NAME = "$(TARGET_NAME)";

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,729 @@
/*
**********************************************************************
* Copyright (C) 1996-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* FILE NAME : UTYPES.H (formerly ptypes.h)
*
* Date Name Description
* 12/11/96 helena Creation.
* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
* uint8, uint16, and uint32.
* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
* well as C++.
* Modified to use memcpy() for uprv_arrayCopy() fns.
* 04/14/97 aliu Added TPlatformUtilities.
* 05/07/97 aliu Added import/export specifiers (replacing the old
* broken EXT_CLASS). Added version number for our
* code. Cleaned up header.
* 6/20/97 helena Java class name change.
* 08/11/98 stephen UErrorCode changed from typedef to enum
* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
* 04/20/99 stephen Cleaned up & reworked for autoconf.
* Renamed to utypes.h.
* 05/05/99 stephen Changed to use <inttypes.h>
* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
*******************************************************************************
*/
#ifndef UTYPES_H
#define UTYPES_H
#include "unicode/umachine.h"
#include "unicode/uversion.h"
#include "unicode/uconfig.h"
#include <float.h>
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
# include "unicode/utf.h"
#endif
/*!
* \file
* \brief Basic definitions for ICU, for both C and C++ APIs
*
* This file defines basic types, constants, and enumerations directly or
* indirectly by including other header files, especially utf.h for the
* basic character and string definitions and umachine.h for consistent
* integer and other types.
*/
/**
* \def U_SHOW_CPLUSPLUS_API
* @internal
*/
#ifdef __cplusplus
# ifndef U_SHOW_CPLUSPLUS_API
# define U_SHOW_CPLUSPLUS_API 1
# endif
#else
# undef U_SHOW_CPLUSPLUS_API
# define U_SHOW_CPLUSPLUS_API 0
#endif
/** @{ API visibility control */
/**
* \def U_HIDE_DRAFT_API
* Define this to 1 to request that draft API be "hidden"
* @internal
*/
/**
* \def U_HIDE_INTERNAL_API
* Define this to 1 to request that internal API be "hidden"
* @internal
*/
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
#define U_HIDE_DRAFT_API 1
#endif
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
#define U_HIDE_INTERNAL_API 1
#endif
/** @} */
/*===========================================================================*/
/* ICUDATA naming scheme */
/*===========================================================================*/
/**
* \def U_ICUDATA_TYPE_LETTER
*
* This is a platform-dependent string containing one letter:
* - b for big-endian, ASCII-family platforms
* - l for little-endian, ASCII-family platforms
* - e for big-endian, EBCDIC-family platforms
* This letter is part of the common data file name.
* @stable ICU 2.0
*/
/**
* \def U_ICUDATA_TYPE_LITLETTER
* The non-string form of U_ICUDATA_TYPE_LETTER
* @stable ICU 2.0
*/
#if U_CHARSET_FAMILY
# if U_IS_BIG_ENDIAN
/* EBCDIC - should always be BE */
# define U_ICUDATA_TYPE_LETTER "e"
# define U_ICUDATA_TYPE_LITLETTER e
# else
# error "Don't know what to do with little endian EBCDIC!"
# define U_ICUDATA_TYPE_LETTER "x"
# define U_ICUDATA_TYPE_LITLETTER x
# endif
#else
# if U_IS_BIG_ENDIAN
/* Big-endian ASCII */
# define U_ICUDATA_TYPE_LETTER "b"
# define U_ICUDATA_TYPE_LITLETTER b
# else
/* Little-endian ASCII */
# define U_ICUDATA_TYPE_LETTER "l"
# define U_ICUDATA_TYPE_LITLETTER l
# endif
#endif
/**
* A single string literal containing the icudata stub name. i.e. 'icudt18e' for
* ICU 1.8.x on EBCDIC, etc..
* @stable ICU 2.0
*/
#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
#ifndef U_HIDE_INTERNAL_API
#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
#define U_USE_USRDATA 1 /**< @internal */
#endif /* U_HIDE_INTERNAL_API */
/**
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
* Defined as a literal, not a string.
* Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
* from the corresponding macro invocation, _before_ other macro substitutions.
* Need a nested \#defines to get the actual version numbers rather than
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
* The net result will be something of the form
* \#define U_ICU_ENTRY_POINT icudt19_dat
* @stable ICU 2.4
*/
#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
#ifndef U_HIDE_INTERNAL_API
/**
* Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
* @internal
*/
#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
/**
* Do not use.
* @internal
*/
#ifndef U_DEF_ICUDATA_ENTRY_POINT
/* affected by symbol renaming. See platform.h */
#ifndef U_LIB_SUFFIX_C_NAME
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
#else
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
#endif
#endif
#endif /* U_HIDE_INTERNAL_API */
/**
* \def NULL
* Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
* @stable ICU 2.0
*/
#ifndef NULL
#ifdef __cplusplus
#define NULL 0
#else
#define NULL ((void *)0)
#endif
#endif
/*===========================================================================*/
/* Calendar/TimeZone data types */
/*===========================================================================*/
/**
* Date and Time data type.
* This is a primitive data type that holds the date and time
* as the number of milliseconds since 1970-jan-01, 00:00 UTC.
* UTC leap seconds are ignored.
* @stable ICU 2.0
*/
typedef double UDate;
/** The number of milliseconds per second @stable ICU 2.0 */
#define U_MILLIS_PER_SECOND (1000)
/** The number of milliseconds per minute @stable ICU 2.0 */
#define U_MILLIS_PER_MINUTE (60000)
/** The number of milliseconds per hour @stable ICU 2.0 */
#define U_MILLIS_PER_HOUR (3600000)
/** The number of milliseconds per day @stable ICU 2.0 */
#define U_MILLIS_PER_DAY (86400000)
/**
* Maximum UDate value
* @stable ICU 4.8
*/
#define U_DATE_MAX DBL_MAX
/**
* Minimum UDate value
* @stable ICU 4.8
*/
#define U_DATE_MIN -U_DATE_MAX
/*===========================================================================*/
/* Shared library/DLL import-export API control */
/*===========================================================================*/
/*
* Control of symbol import/export.
* ICU is separated into three libraries.
*/
/**
* \def U_COMBINED_IMPLEMENTATION
* Set to export library symbols from inside the ICU library
* when all of ICU is in a single library.
* This can be set as a compiler option while building ICU, and it
* needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
* @stable ICU 2.0
*/
/**
* \def U_DATA_API
* Set to export library symbols from inside the stubdata library,
* and to import them from outside.
* @stable ICU 3.0
*/
/**
* \def U_COMMON_API
* Set to export library symbols from inside the common library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_I18N_API
* Set to export library symbols from inside the i18n library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_LAYOUT_API
* Set to export library symbols from inside the layout engine library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_LAYOUTEX_API
* Set to export library symbols from inside the layout extensions library,
* and to import them from outside.
* @stable ICU 2.6
*/
/**
* \def U_IO_API
* Set to export library symbols from inside the ustdio library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_TOOLUTIL_API
* Set to export library symbols from inside the toolutil library,
* and to import them from outside.
* @stable ICU 3.4
*/
#if defined(U_COMBINED_IMPLEMENTATION)
#define U_DATA_API U_EXPORT
#define U_COMMON_API U_EXPORT
#define U_I18N_API U_EXPORT
#define U_LAYOUT_API U_EXPORT
#define U_LAYOUTEX_API U_EXPORT
#define U_IO_API U_EXPORT
#define U_TOOLUTIL_API U_EXPORT
#elif defined(U_STATIC_IMPLEMENTATION)
#define U_DATA_API
#define U_COMMON_API
#define U_I18N_API
#define U_LAYOUT_API
#define U_LAYOUTEX_API
#define U_IO_API
#define U_TOOLUTIL_API
#elif defined(U_COMMON_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_EXPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_I18N_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_EXPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_LAYOUT_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_EXPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_LAYOUTEX_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_EXPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_IO_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_EXPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_TOOLUTIL_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_EXPORT
#else
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#endif
/**
* \def U_STANDARD_CPP_NAMESPACE
* Control of C++ Namespace
* @stable ICU 2.0
*/
#ifdef __cplusplus
#define U_STANDARD_CPP_NAMESPACE ::
#else
#define U_STANDARD_CPP_NAMESPACE
#endif
/*===========================================================================*/
/* Global delete operator */
/*===========================================================================*/
/*
* The ICU4C library must not use the global new and delete operators.
* These operators here are defined to enable testing for this.
* See Jitterbug 2581 for details of why this is necessary.
*
* Verification that ICU4C's memory usage is correct, i.e.,
* that global new/delete are not used:
*
* a) Check for imports of global new/delete (see uobject.cpp for details)
* b) Verify that new is never imported.
* c) Verify that delete is only imported from object code for interface/mixin classes.
* d) Add global delete and delete[] only for the ICU4C library itself
* and define them in a way that crashes or otherwise easily shows a problem.
*
* The following implements d).
* The operator implementations crash; this is intentional and used for library debugging.
*
* Note: This is currently only done on Windows because
* some Linux/Unix compilers have problems with defining global new/delete.
* On Windows, it is _MSC_VER>=1200 for MSVC 6.0 and higher.
*/
#if defined(__cplusplus) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION))
#ifndef U_HIDE_INTERNAL_API
/**
* Global operator new, defined only inside ICU4C, must not be used.
* Crashes intentionally.
* @internal
*/
inline void *
operator new(size_t /*size*/) {
char *q=NULL;
*q=5; /* break it */
return q;
}
#ifdef _Ret_bytecap_
/* This is only needed to suppress a Visual C++ 2008 warning for operator new[]. */
_Ret_bytecap_(_Size)
#endif
/**
* Global operator new[], defined only inside ICU4C, must not be used.
* Crashes intentionally.
* @internal
*/
inline void *
operator new[](size_t /*size*/) {
char *q=NULL;
*q=5; /* break it */
return q;
}
/**
* Global operator delete, defined only inside ICU4C, must not be used.
* Crashes intentionally.
* @internal
*/
inline void
operator delete(void * /*p*/) {
char *q=NULL;
*q=5; /* break it */
}
/**
* Global operator delete[], defined only inside ICU4C, must not be used.
* Crashes intentionally.
* @internal
*/
inline void
operator delete[](void * /*p*/) {
char *q=NULL;
*q=5; /* break it */
}
#endif /* U_HIDE_INTERNAL_API */
#endif
/*===========================================================================*/
/* UErrorCode */
/*===========================================================================*/
/**
* Error code to replace exception handling, so that the code is compatible with all C++ compilers,
* and to use the same mechanism for C and C++.
*
* \par
* ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode
* first test if(U_FAILURE(errorCode)) { return immediately; }
* so that in a chain of such functions the first one that sets an error code
* causes the following ones to not perform any operations.
*
* \par
* Error codes should be tested using U_FAILURE() and U_SUCCESS().
* @stable ICU 2.0
*/
typedef enum UErrorCode {
/* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
* and is that way because VC++ debugger displays first encountered constant,
* which is not the what the code is used for
*/
U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
U_ZERO_ERROR = 0, /**< No error, no warning. */
U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
It is very possible that a circular alias definition has occured */
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
U_STANDARD_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for standard errors */
/*
* the error code range 0x10000 0x10100 are reserved for Transliterator
*/
U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
U_MISSING_OPERATOR, /**< A rule contains no operator */
U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
U_MULTIPLE_CURSORS, /**< More than one cursor */
U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
U_TRAILING_BACKSLASH, /**< A dangling backslash */
U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
U_PARSE_ERROR_LIMIT, /**< The limit for Transliterator errors */
/*
* the error code range 0x10100 0x10200 are reserved for formatting API parsing error
*/
U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */
/*
* the error code range 0x10200 0x102ff are reserved for Break Iterator related error
*/
U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */
U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */
/*
* The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs
*/
U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
#ifndef U_HIDE_DEPRECATED_API
U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
#endif /* U_HIDE_DEPRECATED_API */
U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
#ifndef U_HIDE_DRAFT_API
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @draft ICU 55 */
U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @draft ICU 55 */
#endif /* U_HIDE_DRAFT_API */
U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3, /**< This must always be the last value to indicate the limit for regexp errors */
/*
* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
*/
U_IDNA_PROHIBITED_ERROR=0x10400,
U_IDNA_ERROR_START=0x10400,
U_IDNA_UNASSIGNED_ERROR,
U_IDNA_CHECK_BIDI_ERROR,
U_IDNA_STD3_ASCII_RULES_ERROR,
U_IDNA_ACE_PREFIX_ERROR,
U_IDNA_VERIFICATION_ERROR,
U_IDNA_LABEL_TOO_LONG_ERROR,
U_IDNA_ZERO_LENGTH_LABEL_ERROR,
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
U_IDNA_ERROR_LIMIT,
/*
* Aliases for StringPrep
*/
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
/*
* The error code in the range 0x10500-0x105ff are reserved for Plugin related error codes
*/
U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
U_PLUGIN_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for plugin errors */
U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
} UErrorCode;
/* Use the following to determine if an UErrorCode represents */
/* operational success or failure. */
#ifdef __cplusplus
/**
* Does the error code indicate success?
* @stable ICU 2.0
*/
static
inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
/**
* Does the error code indicate a failure?
* @stable ICU 2.0
*/
static
inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
#else
/**
* Does the error code indicate success?
* @stable ICU 2.0
*/
# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
/**
* Does the error code indicate a failure?
* @stable ICU 2.0
*/
# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
#endif
/**
* Return a string for a UErrorCode value.
* The string will be the same as the name of the error code constant
* in the UErrorCode enum above.
* @stable ICU 2.0
*/
U_STABLE const char * U_EXPORT2
u_errorName(UErrorCode code);
#endif /* _UTYPES */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:icu.xcodeproj">
</FileRef>
</Workspace>

View File

@ -0,0 +1,72 @@
/*
*******************************************************************************
* Copyright (C) 2011-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: appendable.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010dec07
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/appendable.h"
#include "unicode/utf16.h"
U_NAMESPACE_BEGIN
Appendable::~Appendable() {}
UBool
Appendable::appendCodePoint(UChar32 c) {
if(c<=0xffff) {
return appendCodeUnit((UChar)c);
} else {
return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
}
}
UBool
Appendable::appendString(const UChar *s, int32_t length) {
if(length<0) {
UChar c;
while((c=*s++)!=0) {
if(!appendCodeUnit(c)) {
return FALSE;
}
}
} else if(length>0) {
const UChar *limit=s+length;
do {
if(!appendCodeUnit(*s++)) {
return FALSE;
}
} while(s<limit);
}
return TRUE;
}
UBool
Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
return TRUE;
}
UChar *
Appendable::getAppendBuffer(int32_t minCapacity,
int32_t /*desiredCapacityHint*/,
UChar *scratch, int32_t scratchCapacity,
int32_t *resultCapacity) {
if(minCapacity<1 || scratchCapacity<minCapacity) {
*resultCapacity=0;
return NULL;
}
*resultCapacity=scratchCapacity;
return scratch;
}
// UnicodeStringAppendable is implemented in unistr.cpp.
U_NAMESPACE_END

View File

@ -0,0 +1,725 @@
/*
******************************************************************************
*
* Copyright (C) 2007-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: bmpset.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2007jan29
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "cmemory.h"
#include "bmpset.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list(parentList), listLength(parentListLength) {
uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
uprv_memset(table7FF, 0, sizeof(table7FF));
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
/*
* Set the list indexes for binary searches for
* U+0800, U+1000, U+2000, .., U+F000, U+10000.
* U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
* looked up in the bit tables.
* The last pair of indexes is for finding supplementary code points.
*/
list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
int32_t i;
for(i=1; i<=0x10; ++i) {
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
}
list4kStarts[0x11]=listLength-1;
initBits();
overrideIllegal();
}
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
list(newParentList), listLength(newParentListLength) {
uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
}
BMPSet::~BMPSet() {
}
/*
* Set bits in a bit rectangle in "vertical" bit organization.
* start<limit<=0x800
*/
static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
U_ASSERT(start<limit);
U_ASSERT(limit<=0x800);
int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits.
int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
// Set one bit indicating an all-one block.
uint32_t bits=(uint32_t)1<<lead;
if((start+1)==limit) { // Single-character shortcut.
table[trail]|=bits;
return;
}
int32_t limitLead=limit>>6;
int32_t limitTrail=limit&0x3f;
if(lead==limitLead) {
// Partial vertical bit column.
while(trail<limitTrail) {
table[trail++]|=bits;
}
} else {
// Partial vertical bit column,
// followed by a bit rectangle,
// followed by another partial vertical bit column.
if(trail>0) {
do {
table[trail++]|=bits;
} while(trail<64);
++lead;
}
if(lead<limitLead) {
bits=~((1<<lead)-1);
if(limitLead<0x20) {
bits&=(1<<limitLead)-1;
}
for(trail=0; trail<64; ++trail) {
table[trail]|=bits;
}
}
// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
// In that case, bits=1<<limitLead is undefined but the bits value
// is not used because trail<limitTrail is already false.
bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
for(trail=0; trail<limitTrail; ++trail) {
table[trail]|=bits;
}
}
}
void BMPSet::initBits() {
UChar32 start, limit;
int32_t listIndex=0;
// Set asciiBytes[].
do {
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
if(start>=0x80) {
break;
}
do {
asciiBytes[start++]=1;
} while(start<limit && start<0x80);
} while(limit<=0x80);
// Set table7FF[].
while(start<0x800) {
set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
if(limit>0x800) {
start=0x800;
break;
}
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
}
// Set bmpBlockBits[].
int32_t minStart=0x800;
while(start<0x10000) {
if(limit>0x10000) {
limit=0x10000;
}
if(start<minStart) {
start=minStart;
}
if(start<limit) { // Else: Another range entirely in a known mixed-value block.
if(start&0x3f) {
// Mixed-value block of 64 code points.
start>>=6;
bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
start=(start+1)<<6; // Round up to the next block boundary.
minStart=start; // Ignore further ranges in this block.
}
if(start<limit) {
if(start<(limit&~0x3f)) {
// Multiple all-ones blocks of 64 code points each.
set32x64Bits(bmpBlockBits, start>>6, limit>>6);
}
if(limit&0x3f) {
// Mixed-value block of 64 code points.
limit>>=6;
bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
limit=(limit+1)<<6; // Round up to the next block boundary.
minStart=limit; // Ignore further ranges in this block.
}
}
}
if(limit==0x10000) {
break;
}
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
}
}
/*
* Override some bits and bytes to the result of contains(FFFD)
* for faster validity checking at runtime.
* No need to set 0 values where they were reset to 0 in the constructor
* and not modified by initBits().
* (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
* Need to set 0 values for surrogates D800..DFFF.
*/
void BMPSet::overrideIllegal() {
uint32_t bits, mask;
int32_t i;
if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
// contains(FFFD)==TRUE
for(i=0x80; i<0xc0; ++i) {
asciiBytes[i]=1;
}
bits=3; // Lead bytes 0xC0 and 0xC1.
for(i=0; i<64; ++i) {
table7FF[i]|=bits;
}
bits=1; // Lead byte 0xE0.
for(i=0; i<32; ++i) { // First half of 4k block.
bmpBlockBits[i]|=bits;
}
mask=~(0x10001<<0xd); // Lead byte 0xED.
bits=1<<0xd;
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
}
} else {
// contains(FFFD)==FALSE
mask=~(0x10001<<0xd); // Lead byte 0xED.
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]&=mask;
}
}
}
int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
/* Examples:
findCodePoint(c)
set list[] c=0 1 3 4 7 8
=== ============== ===========
[] [110000] 0 0 0 0 0 0
[\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
[\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
[:Any:] [0, 110000] 1 1 1 1 1 1
*/
// Return the smallest i such that c < list[i]. Assume
// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
if (c < list[lo])
return lo;
// High runner test. c is often after the last range, so an
// initial check for this condition pays off.
if (lo >= hi || c >= list[hi-1])
return hi;
// invariant: c >= list[lo]
// invariant: c < list[hi]
for (;;) {
int32_t i = (lo + hi) >> 1;
if (i == lo) {
break; // Found!
} else if (c < list[i]) {
hi = i;
} else {
lo = i;
}
}
return hi;
}
UBool
BMPSet::contains(UChar32 c) const {
if((uint32_t)c<=0x7f) {
return (UBool)asciiBytes[c];
} else if((uint32_t)c<=0x7ff) {
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
return (UBool)twoBits;
} else {
// Look up the code point in its 4k block of code points.
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
}
} else if((uint32_t)c<=0x10ffff) {
// surrogate or supplementary code point
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
} else {
// Out-of-range code points get FALSE, consistent with long-standing
// behavior of UnicodeSet::contains(c).
return FALSE;
}
}
/*
* Check for sufficient length for trail unit for each surrogate pair.
* Handle single surrogates as surrogate code points as usual in ICU.
*/
const UChar *
BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
UChar c, c2;
if(spanCondition) {
// span
do {
c=*s;
if(c<=0x7f) {
if(!asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits==0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
// surrogate code point
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
++s;
}
} while(++s<limit);
} else {
// span not
do {
c=*s;
if(c<=0x7f) {
if(asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
// surrogate code point
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
++s;
}
} while(++s<limit);
}
return s;
}
/* Symmetrical with span(). */
const UChar *
BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
UChar c, c2;
if(spanCondition) {
// span
for(;;) {
c=*(--limit);
if(c<=0x7f) {
if(!asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits==0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
// surrogate code point
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
--limit;
}
if(s==limit) {
return s;
}
}
} else {
// span not
for(;;) {
c=*(--limit);
if(c<=0x7f) {
if(asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
// surrogate code point
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
--limit;
}
if(s==limit) {
return s;
}
}
}
return limit+1;
}
/*
* Precheck for sufficient trail bytes at end of string only once per span.
* Check validity.
*/
const uint8_t *
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
const uint8_t *limit=s+length;
uint8_t b=*s;
if((int8_t)b>=0) {
// Initial all-ASCII span.
if(spanCondition) {
do {
if(!asciiBytes[b] || ++s==limit) {
return s;
}
b=*s;
} while((int8_t)b>=0);
} else {
do {
if(asciiBytes[b] || ++s==limit) {
return s;
}
b=*s;
} while((int8_t)b>=0);
}
length=(int32_t)(limit-s);
}
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
}
const uint8_t *limit0=limit;
/*
* Make sure that the last 1/2/3/4-byte sequence before limit is complete
* or runs into a lead byte.
* In the span loop compare s with limit only once
* per multi-byte character.
*
* Give a trailing illegal sequence the same value as the result of contains(FFFD),
* including it if that is part of the span, otherwise set limit0 to before
* the truncated sequence.
*/
b=*(limit-1);
if((int8_t)b<0) {
// b>=0x80: lead or trail byte
if(b<0xc0) {
// single trail byte, check for preceding 3- or 4-byte lead byte
if(length>=2 && (b=*(limit-2))>=0xe0) {
limit-=2;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
// 4-byte lead byte with only two trail bytes
limit-=3;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
}
} else {
// lead byte with no trail bytes
--limit;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
}
}
uint8_t t1, t2, t3;
while(s<limit) {
b=*s;
if(b<0xc0) {
// ASCII; or trail bytes with the result of contains(FFFD).
if(spanCondition) {
do {
if(!asciiBytes[b]) {
return s;
} else if(++s==limit) {
return limit0;
}
b=*s;
} while(b<0xc0);
} else {
do {
if(asciiBytes[b]) {
return s;
} else if(++s==limit) {
return limit0;
}
b=*s;
} while(b<0xc0);
}
}
++s; // Advance past the lead byte.
if(b>=0xe0) {
if(b<0xf0) {
if( /* handle U+0000..U+FFFF inline */
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
) {
b&=0xf;
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
if(twoBits<=1) {
// All 64 code points with this lead byte and middle trail byte
// are either in the set or not.
if(twoBits!=(uint32_t)spanCondition) {
return s-1;
}
} else {
// Look up the code point in its 4k block of code points.
UChar32 c=(b<<12)|(t1<<6)|t2;
if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
return s-1;
}
}
s+=2;
continue;
}
} else if( /* handle U+10000..U+10FFFF inline */
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
) {
// Give an illegal sequence the same value as the result of contains(FFFD).
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
if( ( (0x10000<=c && c<=0x10ffff) ?
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
asciiBytes[0x80]
) != spanCondition
) {
return s-1;
}
s+=3;
continue;
}
} else /* 0xc0<=b<0xe0 */ {
if( /* handle U+0000..U+07FF inline */
(t1=(uint8_t)(*s-0x80)) <= 0x3f
) {
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
return s-1;
}
++s;
continue;
}
}
// Give an illegal sequence the same value as the result of contains(FFFD).
// Handle each byte of an illegal sequence separately to simplify the code;
// no need to optimize error handling.
if(asciiBytes[0x80]!=spanCondition) {
return s-1;
}
}
return limit0;
}
/*
* While going backwards through UTF-8 optimize only for ASCII.
* Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
* possible to tell from the last byte in a multi-byte sequence how many
* preceding bytes there should be. Therefore, going backwards through UTF-8
* is much harder than going forward.
*/
int32_t
BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
}
uint8_t b;
do {
b=s[--length];
if((int8_t)b>=0) {
// ASCII sub-span
if(spanCondition) {
do {
if(!asciiBytes[b]) {
return length+1;
} else if(length==0) {
return 0;
}
b=s[--length];
} while((int8_t)b>=0);
} else {
do {
if(asciiBytes[b]) {
return length+1;
} else if(length==0) {
return 0;
}
b=s[--length];
} while((int8_t)b>=0);
}
}
int32_t prev=length;
UChar32 c;
// trail byte: collect a multi-byte character
// (or lead byte in last-trail position)
c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
// c is a valid code point, not ASCII, not a surrogate
if(c<=0x7ff) {
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
return prev+1;
}
} else if(c<=0xffff) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=(uint32_t)spanCondition) {
return prev+1;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
return prev+1;
}
}
} else {
if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
return prev+1;
}
}
} while(length>0);
return 0;
}
U_NAMESPACE_END

View File

@ -0,0 +1,161 @@
/*
******************************************************************************
*
* Copyright (C) 2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: bmpset.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2007jan29
* created by: Markus W. Scherer
*/
#ifndef __BMPSET_H__
#define __BMPSET_H__
#include "unicode/utypes.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN
/*
* Helper class for frozen UnicodeSets, implements contains() and span()
* optimized for BMP code points. Structured to be UTF-8-friendly.
*
* ASCII: Look up bytes.
* 2-byte characters: Bits organized vertically.
* 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
* with mixed for illegal ranges.
* Supplementary characters: Call contains() on the parent set.
*/
class BMPSet : public UMemory {
public:
BMPSet(const int32_t *parentList, int32_t parentListLength);
BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
virtual ~BMPSet();
virtual UBool contains(UChar32 c) const;
/*
* Span the initial substring for which each character c has spanCondition==contains(c).
* It must be s<limit and spanCondition==0 or 1.
* @return The string pointer which limits the span.
*/
const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
/*
* Span the trailing substring for which each character c has spanCondition==contains(c).
* It must be s<limit and spanCondition==0 or 1.
* @return The string pointer which starts the span.
*/
const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
/*
* Span the initial substring for which each character c has spanCondition==contains(c).
* It must be length>0 and spanCondition==0 or 1.
* @return The string pointer which limits the span.
*/
const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
/*
* Span the trailing substring for which each character c has spanCondition==contains(c).
* It must be length>0 and spanCondition==0 or 1.
* @return The start of the span.
*/
int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
private:
void initBits();
void overrideIllegal();
/**
* Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
* binary search is restricted for finding code points in a certain range.
*
* For restricting the search for finding in the range start..end,
* pass in
* lo=findCodePoint(start) and
* hi=findCodePoint(end)
* with 0<=lo<=hi<len.
* findCodePoint(c) defaults to lo=0 and hi=len-1.
*
* @param c a character in a subrange of MIN_VALUE..MAX_VALUE
* @param lo The lowest index to be returned.
* @param hi The highest index to be returned.
* @return the smallest integer i in the range lo..hi,
* inclusive, such that c < list[i]
*/
int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
/*
* One byte per ASCII character, or trail byte in lead position.
* 0 or 1 for ASCII characters.
* The value for trail bytes is the result of contains(FFFD)
* for faster validity checking at runtime.
*/
UBool asciiBytes[0xc0];
/*
* One bit per code point from U+0000..U+07FF.
* The bits are organized vertically; consecutive code points
* correspond to the same bit positions in consecutive table words.
* With code point parts
* lead=c{10..6}
* trail=c{5..0}
* it is set.contains(c)==(table7FF[trail] bit lead)
*
* Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
* for faster validity checking at runtime.
*/
uint32_t table7FF[64];
/*
* One bit per 64 BMP code points.
* The bits are organized vertically; consecutive 64-code point blocks
* correspond to the same bit position in consecutive table words.
* With code point parts
* lead=c{15..12}
* t1=c{11..6}
* test bits (lead+16) and lead in bmpBlockBits[t1].
* If the upper bit is 0, then the lower bit indicates if contains(c)
* for all code points in the 64-block.
* If the upper bit is 1, then the block is mixed and set.contains(c)
* must be called.
*
* Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
* the result of contains(FFFD) for faster validity checking at runtime.
*/
uint32_t bmpBlockBits[64];
/*
* Inversion list indexes for restricted binary searches in
* findCodePoint(), from
* findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
* U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
* always looked up in the bit tables.
* The last pair of indexes is for finding supplementary code points.
*/
int32_t list4kStarts[18];
/*
* The inversion list of the parent set, for the slower contains() implementation
* for mixed BMP blocks and for supplementary code points.
* The list is terminated with list[listLength-1]=0x110000.
*/
const int32_t *list;
int32_t listLength;
};
inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
return (UBool)(findCodePoint(c, lo, hi) & 1);
}
U_NAMESPACE_END
#endif

View File

@ -0,0 +1,300 @@
/*
************************************************************************************
* Copyright (C) 2006-2015, International Business Machines Corporation
* and others. All Rights Reserved.
************************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "brkeng.h"
#include "dictbe.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/ures.h"
#include "unicode/udata.h"
#include "unicode/putil.h"
#include "unicode/ustring.h"
#include "unicode/uscript.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
#include "charstr.h"
#include "dictionarydata.h"
#include "mutex.h"
#include "uvector.h"
#include "umutex.h"
#include "uresimp.h"
#include "ubrkimpl.h"
U_NAMESPACE_BEGIN
/*
******************************************************************
*/
LanguageBreakEngine::LanguageBreakEngine() {
}
LanguageBreakEngine::~LanguageBreakEngine() {
}
/*
******************************************************************
*/
LanguageBreakFactory::LanguageBreakFactory() {
}
LanguageBreakFactory::~LanguageBreakFactory() {
}
/*
******************************************************************
*/
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
fHandled[i] = 0;
}
}
UnhandledEngine::~UnhandledEngine() {
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
if (fHandled[i] != 0) {
delete fHandled[i];
}
}
}
UBool
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
}
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &/*foundBreaks*/ ) const {
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
UChar32 c = utext_current32(text);
if (reverse) {
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
c = utext_previous32(text);
}
}
else {
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
}
}
return 0;
}
void
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
if (fHandled[breakType] == 0) {
fHandled[breakType] = new UnicodeSet();
if (fHandled[breakType] == 0) {
return;
}
}
if (!fHandled[breakType]->contains(c)) {
UErrorCode status = U_ZERO_ERROR;
// Apply the entire script of the character.
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
}
}
}
/*
******************************************************************
*/
ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
fEngines = 0;
}
ICULanguageBreakFactory::~ICULanguageBreakFactory() {
if (fEngines != 0) {
delete fEngines;
}
}
U_NAMESPACE_END
U_CDECL_BEGIN
static void U_CALLCONV _deleteEngine(void *obj) {
delete (const icu::LanguageBreakEngine *) obj;
}
U_CDECL_END
U_NAMESPACE_BEGIN
static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
const LanguageBreakEngine *lbe = NULL;
UErrorCode status = U_ZERO_ERROR;
Mutex m(&gBreakEngineMutex);
if (fEngines == NULL) {
UStack *engines = new UStack(_deleteEngine, NULL, status);
if (U_FAILURE(status) || engines == NULL) {
// Note: no way to return error code to caller.
delete engines;
return NULL;
}
fEngines = engines;
} else {
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != NULL && lbe->handles(c, breakType)) {
return lbe;
}
}
}
// We didn't find an engine. Create one.
lbe = loadEngineFor(c, breakType);
if (lbe != NULL) {
fEngines->push((void *)lbe, status);
}
return lbe;
}
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
if (m != NULL) {
const LanguageBreakEngine *engine = NULL;
switch(code) {
case USCRIPT_THAI:
engine = new ThaiBreakEngine(m, status);
break;
case USCRIPT_LAO:
engine = new LaoBreakEngine(m, status);
break;
case USCRIPT_MYANMAR:
engine = new BurmeseBreakEngine(m, status);
break;
case USCRIPT_KHMER:
engine = new KhmerBreakEngine(m, status);
break;
#if !UCONFIG_NO_NORMALIZATION
// CJK not available w/o normalization
case USCRIPT_HANGUL:
engine = new CjkBreakEngine(m, kKorean, status);
break;
// use same BreakEngine and dictionary for both Chinese and Japanese
case USCRIPT_HIRAGANA:
case USCRIPT_KATAKANA:
case USCRIPT_HAN:
engine = new CjkBreakEngine(m, kChineseJapanese, status);
break;
#if 0
// TODO: Have to get some characters with script=common handled
// by CjkBreakEngine (e.g. U+309B). Simply subjecting
// them to CjkBreakEngine does not work. The engine has to
// special-case them.
case USCRIPT_COMMON:
{
UBlockCode block = ublock_getCode(code);
if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
engine = new CjkBreakEngine(dict, kChineseJapanese, status);
break;
}
#endif
#endif
default:
break;
}
if (engine == NULL) {
delete m;
}
else if (U_FAILURE(status)) {
delete engine;
engine = NULL;
}
return engine;
}
}
return NULL;
}
DictionaryMatcher *
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
UErrorCode status = U_ZERO_ERROR;
// open root from brkitr tree.
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
int32_t dictnlength = 0;
const UChar *dictfname =
ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
if (U_FAILURE(status)) {
ures_close(b);
return NULL;
}
CharString dictnbuf;
CharString ext;
const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
if (extStart != NULL) {
int32_t len = (int32_t)(extStart - dictfname);
ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
dictnlength = len;
}
dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
ures_close(b);
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
if (U_SUCCESS(status)) {
// build trie
const uint8_t *data = (const uint8_t *)udata_getMemory(file);
const int32_t *indexes = (const int32_t *)data;
const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
DictionaryMatcher *m = NULL;
if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
const char *characters = (const char *)(data + offset);
m = new BytesDictionaryMatcher(characters, transform, file);
}
else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
const UChar *characters = (const UChar *)(data + offset);
m = new UCharsDictionaryMatcher(characters, file);
}
if (m == NULL) {
// no matcher exists to take ownership - either we are an invalid
// type or memory allocation failed
udata_close(file);
}
return m;
} else if (dictfname != NULL) {
// we don't have a dictionary matcher.
// returning NULL here will cause us to fail to find a dictionary break engine, as expected
status = U_ZERO_ERROR;
return NULL;
}
return NULL;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

View File

@ -0,0 +1,289 @@
/**
************************************************************************************
* Copyright (C) 2006-2012, International Business Machines Corporation and others. *
* All Rights Reserved. *
************************************************************************************
*/
#ifndef BRKENG_H
#define BRKENG_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/utext.h"
#include "unicode/uscript.h"
U_NAMESPACE_BEGIN
class UnicodeSet;
class UStack;
class DictionaryMatcher;
/*******************************************************************
* LanguageBreakEngine
*/
/**
* <p>LanguageBreakEngines implement language-specific knowledge for
* finding text boundaries within a run of characters belonging to a
* specific set. The boundaries will be of a specific kind, e.g. word,
* line, etc.</p>
*
* <p>LanguageBreakEngines should normally be implemented so as to
* be shared between threads without locking.</p>
*/
class LanguageBreakEngine : public UMemory {
public:
/**
* <p>Default constructor.</p>
*
*/
LanguageBreakEngine();
/**
* <p>Virtual destructor.</p>
*/
virtual ~LanguageBreakEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text. The
* iterator is left at the end of the run of characters which the engine
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const = 0;
};
/*******************************************************************
* LanguageBreakFactory
*/
/**
* <p>LanguageBreakFactorys find and return a LanguageBreakEngine
* that can determine breaks for characters in a specific set, if
* such an object can be found.</p>
*
* <p>If a LanguageBreakFactory is to be shared between threads,
* appropriate synchronization must be used; there is none internal
* to the factory.</p>
*
* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
* normally be shared between threads without synchronization, unless
* the specific subclass of LanguageBreakFactory indicates otherwise.</p>
*
* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
* it returns when it itself is deleted, unless the specific subclass of
* LanguageBreakFactory indicates otherwise. Naturally, the factory should
* not be deleted until the LanguageBreakEngines it has returned are no
* longer needed.</p>
*/
class LanguageBreakFactory : public UMemory {
public:
/**
* <p>Default constructor.</p>
*
*/
LanguageBreakFactory();
/**
* <p>Virtual destructor.</p>
*/
virtual ~LanguageBreakFactory();
/**
* <p>Find and return a LanguageBreakEngine that can find the desired
* kind of break for the set of characters to which the supplied
* character belongs. It is up to the set of available engines to
* determine what the sets of characters are.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
};
/*******************************************************************
* UnhandledEngine
*/
/**
* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
* handles characters that no other LanguageBreakEngine is available to
* handle. It is told the character and the type of break; at its
* discretion it may handle more than the specified character (e.g.,
* the entire script to which that character belongs.</p>
*
* <p>UnhandledEngines may not be shared between threads without
* external synchronization.</p>
*/
class UnhandledEngine : public LanguageBreakEngine {
private:
/**
* The sets of characters handled, for each break type
* @internal
*/
UnicodeSet *fHandled[4];
public:
/**
* <p>Default constructor.</p>
*
*/
UnhandledEngine(UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~UnhandledEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text (TODO: UText). The
* iterator is left at the end of the run of characters which the engine
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const;
/**
* <p>Tell the engine to handle a particular character and break type.</p>
*
* @param c A character which the engine should handle
* @param breakType The type of text break for which the engine should handle c
*/
virtual void handleCharacter(UChar32 c, int32_t breakType);
};
/*******************************************************************
* ICULanguageBreakFactory
*/
/**
* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
* ICU. It creates dictionary-based LanguageBreakEngines from dictionary
* data in the ICU data file.</p>
*/
class ICULanguageBreakFactory : public LanguageBreakFactory {
private:
/**
* The stack of break engines created by this factory
* @internal
*/
UStack *fEngines;
public:
/**
* <p>Standard constructor.</p>
*
*/
ICULanguageBreakFactory(UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~ICULanguageBreakFactory();
/**
* <p>Find and return a LanguageBreakEngine that can find the desired
* kind of break for the set of characters to which the supplied
* character belongs. It is up to the set of available engines to
* determine what the sets of characters are.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
protected:
/**
* <p>Create a LanguageBreakEngine for the set of characters to which
* the supplied character belongs, for the specified break type.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
/**
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
* @param script An ISO 15924 script code that identifies the dictionary to be
* created.
* @param breakType The kind of text break for which a dictionary is
* sought.
* @return A DictionaryMatcher with the desired characteristics, or NULL.
*/
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
};
U_NAMESPACE_END
/* BRKENG_H */
#endif

View File

@ -0,0 +1,492 @@
/*
*******************************************************************************
* Copyright (C) 1997-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File brkiter.cpp
*
* Modification History:
*
* Date Name Description
* 02/18/97 aliu Converted from OpenClass. Added DONE.
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
*****************************************************************************************
*/
// *****************************************************************************
// This file was generated from the java source file BreakIterator.java
// *****************************************************************************
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/rbbi.h"
#include "unicode/brkiter.h"
#include "unicode/udata.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/filteredbrk.h"
#include "ucln_cmn.h"
#include "cstring.h"
#include "umutex.h"
#include "servloc.h"
#include "locbased.h"
#include "uresimp.h"
#include "uassert.h"
#include "ubrkimpl.h"
#include "charstr.h"
// *****************************************************************************
// class BreakIterator
// This class implements methods for finding the location of boundaries in text.
// Instances of BreakIterator maintain a current position and scan over text
// returning the index of characters where boundaries occur.
// *****************************************************************************
U_NAMESPACE_BEGIN
// -------------------------------------
BreakIterator*
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
{
char fnbuff[256];
char ext[4]={'\0'};
CharString actualLocale;
int32_t size;
const UChar* brkfname = NULL;
UResourceBundle brkRulesStack;
UResourceBundle brkNameStack;
UResourceBundle *brkRules = &brkRulesStack;
UResourceBundle *brkName = &brkNameStack;
RuleBasedBreakIterator *result = NULL;
if (U_FAILURE(status))
return NULL;
ures_initStackObject(brkRules);
ures_initStackObject(brkName);
// Get the locale
UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
// Get the "boundaries" array.
if (U_SUCCESS(status)) {
brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
// Get the string object naming the rules file
brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
// Get the actual string
brkfname = ures_getString(brkName, &size, &status);
U_ASSERT((size_t)size<sizeof(fnbuff));
if ((size_t)size>=sizeof(fnbuff)) {
size=0;
if (U_SUCCESS(status)) {
status = U_BUFFER_OVERFLOW_ERROR;
}
}
// Use the string if we found it
if (U_SUCCESS(status) && brkfname) {
actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
UChar* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
if(extStart!=NULL){
len = (int)(extStart-brkfname);
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
u_UCharsToChars(brkfname, fnbuff, len);
}
fnbuff[len]=0; // nul terminate
}
}
ures_close(brkRules);
ures_close(brkName);
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
if (U_FAILURE(status)) {
ures_close(b);
return NULL;
}
// Create a RuleBasedBreakIterator
result = new RuleBasedBreakIterator(file, status);
// If there is a result, set the valid locale and actual locale, and the kind
if (U_SUCCESS(status) && result != NULL) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actualLocale.data());
result->setBreakType(kind);
}
ures_close(b);
if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
delete result;
return NULL;
}
if (result == NULL) {
udata_close(file);
if (U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
return result;
}
// Creates a break iterator for word breaks.
BreakIterator* U_EXPORT2
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_WORD, status);
}
// -------------------------------------
// Creates a break iterator for line breaks.
BreakIterator* U_EXPORT2
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_LINE, status);
}
// -------------------------------------
// Creates a break iterator for character breaks.
BreakIterator* U_EXPORT2
BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_CHARACTER, status);
}
// -------------------------------------
// Creates a break iterator for sentence breaks.
BreakIterator* U_EXPORT2
BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_SENTENCE, status);
}
// -------------------------------------
// Creates a break iterator for title casing breaks.
BreakIterator* U_EXPORT2
BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_TITLE, status);
}
// -------------------------------------
// Gets all the available locales that has localized text boundary data.
const Locale* U_EXPORT2
BreakIterator::getAvailableLocales(int32_t& count)
{
return Locale::getAvailableLocales(count);
}
// ------------------------------------------
//
// Default constructor and destructor
//
//-------------------------------------------
BreakIterator::BreakIterator()
{
*validLocale = *actualLocale = 0;
}
BreakIterator::~BreakIterator()
{
}
// ------------------------------------------
//
// Registration
//
//-------------------------------------------
#if !UCONFIG_NO_SERVICE
// -------------------------------------
class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
public:
virtual ~ICUBreakIteratorFactory();
protected:
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
return BreakIterator::makeInstance(loc, kind, status);
}
};
ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
// -------------------------------------
class ICUBreakIteratorService : public ICULocaleService {
public:
ICUBreakIteratorService()
: ICULocaleService(UNICODE_STRING("Break Iterator", 14))
{
UErrorCode status = U_ZERO_ERROR;
registerFactory(new ICUBreakIteratorFactory(), status);
}
virtual ~ICUBreakIteratorService();
virtual UObject* cloneInstance(UObject* instance) const {
return ((BreakIterator*)instance)->clone();
}
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
LocaleKey& lkey = (LocaleKey&)key;
int32_t kind = lkey.kind();
Locale loc;
lkey.currentLocale(loc);
return BreakIterator::makeInstance(loc, kind, status);
}
virtual UBool isDefault() const {
return countFactories() == 1;
}
};
ICUBreakIteratorService::~ICUBreakIteratorService() {}
// -------------------------------------
// defined in ucln_cmn.h
U_NAMESPACE_END
static icu::UInitOnce gInitOnce;
static icu::ICULocaleService* gService = NULL;
/**
* Release all static memory held by breakiterator.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV breakiterator_cleanup(void) {
#if !UCONFIG_NO_SERVICE
if (gService) {
delete gService;
gService = NULL;
}
gInitOnce.reset();
#endif
return TRUE;
}
U_CDECL_END
U_NAMESPACE_BEGIN
static void U_CALLCONV
initService(void) {
gService = new ICUBreakIteratorService();
ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
}
static ICULocaleService*
getService(void)
{
umtx_initOnce(gInitOnce, &initService);
return gService;
}
// -------------------------------------
static inline UBool
hasService(void)
{
return !gInitOnce.isReset() && getService() != NULL;
}
// -------------------------------------
URegistryKey U_EXPORT2
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
{
ICULocaleService *service = getService();
if (service == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
return service->registerInstance(toAdopt, locale, kind, status);
}
// -------------------------------------
UBool U_EXPORT2
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
{
if (U_SUCCESS(status)) {
if (hasService()) {
return gService->unregister(key, status);
}
status = U_MEMORY_ALLOCATION_ERROR;
}
return FALSE;
}
// -------------------------------------
StringEnumeration* U_EXPORT2
BreakIterator::getAvailableLocales(void)
{
ICULocaleService *service = getService();
if (service == NULL) {
return NULL;
}
return service->getAvailableLocales();
}
#endif /* UCONFIG_NO_SERVICE */
// -------------------------------------
BreakIterator*
BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
{
if (U_FAILURE(status)) {
return NULL;
}
#if !UCONFIG_NO_SERVICE
if (hasService()) {
Locale actualLoc("");
BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
// TODO: The way the service code works in ICU 2.8 is that if
// there is a real registered break iterator, the actualLoc
// will be populated, but if the handleDefault path is taken
// (because nothing is registered that can handle the
// requested locale) then the actualLoc comes back empty. In
// that case, the returned object already has its actual/valid
// locale data populated (by makeInstance, which is what
// handleDefault calls), so we don't touch it. YES, A COMMENT
// THIS LONG is a sign of bad code -- so the action item is to
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
U_LOCALE_BASED(locBased, *result);
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
}
return result;
}
else
#endif
{
return makeInstance(loc, kind, status);
}
}
// -------------------------------------
enum { kKeyValueLenMax = 32 };
BreakIterator*
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
{
if (U_FAILURE(status)) {
return NULL;
}
char lbType[kKeyValueLenMax];
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
break;
case UBRK_WORD:
result = BreakIterator::buildInstance(loc, "word", kind, status);
break;
case UBRK_LINE:
uprv_strcpy(lbType, "line");
{
char lbKeyValue[kKeyValueLenMax] = {0};
UErrorCode kvStatus = U_ZERO_ERROR;
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
uprv_strcat(lbType, "_");
uprv_strcat(lbType, lbKeyValue);
}
}
result = BreakIterator::buildInstance(loc, lbType, kind, status);
break;
case UBRK_SENTENCE:
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
{
char ssKeyValue[kKeyValueLenMax] = {0};
UErrorCode kvStatus = U_ZERO_ERROR;
int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
if (U_SUCCESS(kvStatus)) {
result = fbiBuilder->build(result, status);
delete fbiBuilder;
}
}
}
break;
case UBRK_TITLE:
result = BreakIterator::buildInstance(loc, "title", kind, status);
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
}
if (U_FAILURE(status)) {
return NULL;
}
return result;
}
Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
U_LOCALE_BASED(locBased, *this);
return locBased.getLocale(type, status);
}
const char *
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
U_LOCALE_BASED(locBased, *this);
return locBased.getLocaleID(type, status);
}
// This implementation of getRuleStatus is a do-nothing stub, here to
// provide a default implementation for any derived BreakIterator classes that
// do not implement it themselves.
int32_t BreakIterator::getRuleStatus() const {
return 0;
}
// This implementation of getRuleStatusVec is a do-nothing stub, here to
// provide a default implementation for any derived BreakIterator classes that
// do not implement it themselves.
int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
if (U_FAILURE(status)) {
return 0;
}
if (capacity < 1) {
status = U_BUFFER_OVERFLOW_ERROR;
return 1;
}
*fillInVec = 0;
return 1;
}
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
U_LOCALE_BASED(locBased, (*this));
locBased.setLocaleIDs(valid, actual);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
//eof

View File

@ -0,0 +1,77 @@
// Copyright (C) 2009-2011, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sanjay@google.com (Sanjay Ghemawat)
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
ByteSink::~ByteSink() {}
char* ByteSink::GetAppendBuffer(int32_t min_capacity,
int32_t /*desired_capacity_hint*/,
char* scratch, int32_t scratch_capacity,
int32_t* result_capacity) {
if (min_capacity < 1 || scratch_capacity < min_capacity) {
*result_capacity = 0;
return NULL;
}
*result_capacity = scratch_capacity;
return scratch;
}
void ByteSink::Flush() {}
CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
: outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
size_(0), appended_(0), overflowed_(FALSE) {
}
CheckedArrayByteSink::~CheckedArrayByteSink() {}
CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
size_ = appended_ = 0;
overflowed_ = FALSE;
return *this;
}
void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
if (n <= 0) {
return;
}
appended_ += n;
int32_t available = capacity_ - size_;
if (n > available) {
n = available;
overflowed_ = TRUE;
}
if (n > 0 && bytes != (outbuf_ + size_)) {
uprv_memcpy(outbuf_ + size_, bytes, n);
}
size_ += n;
}
char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
int32_t /*desired_capacity_hint*/,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) {
if (min_capacity < 1 || scratch_capacity < min_capacity) {
*result_capacity = 0;
return NULL;
}
int32_t available = capacity_ - size_;
if (available >= min_capacity) {
*result_capacity = available;
return outbuf_ + size_;
} else {
*result_capacity = scratch_capacity;
return scratch;
}
}
U_NAMESPACE_END

View File

@ -0,0 +1,439 @@
/*
*******************************************************************************
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytestrie.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010sep25
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/bytestrie.h"
#include "unicode/uobject.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
BytesTrie::~BytesTrie() {
uprv_free(ownedArray_);
}
// lead byte already shifted right by 1.
int32_t
BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
int32_t value;
if(leadByte<kMinTwoByteValueLead) {
value=leadByte-kMinOneByteValueLead;
} else if(leadByte<kMinThreeByteValueLead) {
value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
} else if(leadByte<kFourByteValueLead) {
value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
} else if(leadByte==kFourByteValueLead) {
value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
} else {
value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
}
return value;
}
const uint8_t *
BytesTrie::jumpByDelta(const uint8_t *pos) {
int32_t delta=*pos++;
if(delta<kMinTwoByteDeltaLead) {
// nothing to do
} else if(delta<kMinThreeByteDeltaLead) {
delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
} else if(delta<kFourByteDeltaLead) {
delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
pos+=2;
} else if(delta==kFourByteDeltaLead) {
delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
pos+=3;
} else {
delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
pos+=4;
}
return pos+delta;
}
UStringTrieResult
BytesTrie::current() const {
const uint8_t *pos=pos_;
if(pos==NULL) {
return USTRINGTRIE_NO_MATCH;
} else {
int32_t node;
return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
}
UStringTrieResult
BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
// Branch according to the current byte.
if(length==0) {
length=*pos++;
}
++length;
// The length of the branch is the number of bytes to select from.
// The data structure encodes a binary search.
while(length>kMaxBranchLinearSubNodeLength) {
if(inByte<*pos++) {
length>>=1;
pos=jumpByDelta(pos);
} else {
length=length-(length>>1);
pos=skipDelta(pos);
}
}
// Drop down to linear search for the last few bytes.
// length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
// and divides length by 2.
do {
if(inByte==*pos++) {
UStringTrieResult result;
int32_t node=*pos;
U_ASSERT(node>=kMinValueLead);
if(node&kValueIsFinal) {
// Leave the final value for getValue() to read.
result=USTRINGTRIE_FINAL_VALUE;
} else {
// Use the non-final value as the jump delta.
++pos;
// int32_t delta=readValue(pos, node>>1);
node>>=1;
int32_t delta;
if(node<kMinTwoByteValueLead) {
delta=node-kMinOneByteValueLead;
} else if(node<kMinThreeByteValueLead) {
delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
} else if(node<kFourByteValueLead) {
delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
pos+=2;
} else if(node==kFourByteValueLead) {
delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
pos+=3;
} else {
delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
pos+=4;
}
// end readValue()
pos+=delta;
node=*pos;
result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
}
pos_=pos;
return result;
}
--length;
pos=skipValue(pos);
} while(length>1);
if(inByte==*pos++) {
pos_=pos;
int32_t node=*pos;
return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
stop();
return USTRINGTRIE_NO_MATCH;
}
}
UStringTrieResult
BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
return branchNext(pos, node, inByte);
} else if(node<kMinValueLead) {
// Match the first of length+1 bytes.
int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
if(inByte==*pos++) {
remainingMatchLength_=--length;
pos_=pos;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
// No match.
break;
}
} else if(node&kValueIsFinal) {
// No further matching bytes.
break;
} else {
// Skip intermediate value.
pos=skipValue(pos, node);
// The next node must not also be a value node.
U_ASSERT(*pos<kMinValueLead);
}
}
stop();
return USTRINGTRIE_NO_MATCH;
}
UStringTrieResult
BytesTrie::next(int32_t inByte) {
const uint8_t *pos=pos_;
if(pos==NULL) {
return USTRINGTRIE_NO_MATCH;
}
if(inByte<0) {
inByte+=0x100;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
if(length>=0) {
// Remaining part of a linear-match node.
if(inByte==*pos++) {
remainingMatchLength_=--length;
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
stop();
return USTRINGTRIE_NO_MATCH;
}
}
return nextImpl(pos, inByte);
}
UStringTrieResult
BytesTrie::next(const char *s, int32_t sLength) {
if(sLength<0 ? *s==0 : sLength==0) {
// Empty input.
return current();
}
const uint8_t *pos=pos_;
if(pos==NULL) {
return USTRINGTRIE_NO_MATCH;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
for(;;) {
// Fetch the next input byte, if there is one.
// Continue a linear-match node without rechecking sLength<0.
int32_t inByte;
if(sLength<0) {
for(;;) {
if((inByte=*s++)==0) {
remainingMatchLength_=length;
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
if(length<0) {
remainingMatchLength_=length;
break;
}
if(inByte!=*pos) {
stop();
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
}
} else {
for(;;) {
if(sLength==0) {
remainingMatchLength_=length;
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
inByte=*s++;
--sLength;
if(length<0) {
remainingMatchLength_=length;
break;
}
if(inByte!=*pos) {
stop();
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
}
}
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
UStringTrieResult result=branchNext(pos, node, inByte);
if(result==USTRINGTRIE_NO_MATCH) {
return USTRINGTRIE_NO_MATCH;
}
// Fetch the next input byte, if there is one.
if(sLength<0) {
if((inByte=*s++)==0) {
return result;
}
} else {
if(sLength==0) {
return result;
}
inByte=*s++;
--sLength;
}
if(result==USTRINGTRIE_FINAL_VALUE) {
// No further matching bytes.
stop();
return USTRINGTRIE_NO_MATCH;
}
pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
} else if(node<kMinValueLead) {
// Match length+1 bytes.
length=node-kMinLinearMatch; // Actual match length minus 1.
if(inByte!=*pos) {
stop();
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
break;
} else if(node&kValueIsFinal) {
// No further matching bytes.
stop();
return USTRINGTRIE_NO_MATCH;
} else {
// Skip intermediate value.
pos=skipValue(pos, node);
// The next node must not also be a value node.
U_ASSERT(*pos<kMinValueLead);
}
}
}
}
const uint8_t *
BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
UBool haveUniqueValue, int32_t &uniqueValue) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
return NULL;
}
length=length-(length>>1);
pos=skipDelta(pos);
}
do {
++pos; // ignore a comparison byte
// handle its value
int32_t node=*pos++;
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
pos=skipValue(pos, node);
if(isFinal) {
if(haveUniqueValue) {
if(value!=uniqueValue) {
return NULL;
}
} else {
uniqueValue=value;
haveUniqueValue=TRUE;
}
} else {
if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
return NULL;
}
haveUniqueValue=TRUE;
}
} while(--length>1);
return pos+1; // ignore the last comparison byte
}
UBool
BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
if(node==0) {
node=*pos++;
}
pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
if(pos==NULL) {
return FALSE;
}
haveUniqueValue=TRUE;
} else if(node<kMinValueLead) {
// linear-match node
pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
} else {
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
if(haveUniqueValue) {
if(value!=uniqueValue) {
return FALSE;
}
} else {
uniqueValue=value;
haveUniqueValue=TRUE;
}
if(isFinal) {
return TRUE;
}
pos=skipValue(pos, node);
}
}
}
int32_t
BytesTrie::getNextBytes(ByteSink &out) const {
const uint8_t *pos=pos_;
if(pos==NULL) {
return 0;
}
if(remainingMatchLength_>=0) {
append(out, *pos); // Next byte of a pending linear-match node.
return 1;
}
int32_t node=*pos++;
if(node>=kMinValueLead) {
if(node&kValueIsFinal) {
return 0;
} else {
pos=skipValue(pos, node);
node=*pos++;
U_ASSERT(node<kMinValueLead);
}
}
if(node<kMinLinearMatch) {
if(node==0) {
node=*pos++;
}
getNextBranchBytes(pos, ++node, out);
return node;
} else {
// First byte of the linear-match node.
append(out, *pos);
return 1;
}
}
void
BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
getNextBranchBytes(jumpByDelta(pos), length>>1, out);
length=length-(length>>1);
pos=skipDelta(pos);
}
do {
append(out, *pos++);
pos=skipValue(pos);
} while(--length>1);
append(out, *pos);
}
void
BytesTrie::append(ByteSink &out, int c) {
char ch=(char)c;
out.Append(&ch, 1);
}
U_NAMESPACE_END

View File

@ -0,0 +1,501 @@
/*
*******************************************************************************
* Copyright (C) 2010-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytestriebuilder.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010sep25
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/bytestriebuilder.h"
#include "unicode/stringpiece.h"
#include "charstr.h"
#include "cmemory.h"
#include "uhash.h"
#include "uarrsort.h"
#include "uassert.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
/*
* Note: This builder implementation stores (bytes, value) pairs with full copies
* of the byte sequences, until the BytesTrie is built.
* It might(!) take less memory if we collected the data in a temporary, dynamic trie.
*/
class BytesTrieElement : public UMemory {
public:
// Use compiler's default constructor, initializes nothing.
void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode);
StringPiece getString(const CharString &strings) const {
int32_t offset=stringOffset;
int32_t length;
if(offset>=0) {
length=(uint8_t)strings[offset++];
} else {
offset=~offset;
length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
offset+=2;
}
return StringPiece(strings.data()+offset, length);
}
int32_t getStringLength(const CharString &strings) const {
int32_t offset=stringOffset;
if(offset>=0) {
return (uint8_t)strings[offset];
} else {
offset=~offset;
return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
}
}
char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
int32_t getValue() const { return value; }
int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
private:
const char *data(const CharString &strings) const {
int32_t offset=stringOffset;
if(offset>=0) {
++offset;
} else {
offset=~offset+2;
}
return strings.data()+offset;
}
// If the stringOffset is non-negative, then the first strings byte contains
// the string length.
// If the stringOffset is negative, then the first two strings bytes contain
// the string length (big-endian), and the offset needs to be bit-inverted.
// (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
int32_t stringOffset;
int32_t value;
};
void
BytesTrieElement::setTo(const StringPiece &s, int32_t val,
CharString &strings, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
int32_t length=s.length();
if(length>0xffff) {
// Too long: We store the length in 1 or 2 bytes.
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
int32_t offset=strings.length();
if(length>0xff) {
offset=~offset;
strings.append((char)(length>>8), errorCode);
}
strings.append((char)length, errorCode);
stringOffset=offset;
value=val;
strings.append(s, errorCode);
}
int32_t
BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
// TODO: add StringPiece::compare(), see ticket #8187
StringPiece thisString=getString(strings);
StringPiece otherString=other.getString(strings);
int32_t lengthDiff=thisString.length()-otherString.length();
int32_t commonLength;
if(lengthDiff<=0) {
commonLength=thisString.length();
} else {
commonLength=otherString.length();
}
int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
return diff!=0 ? diff : lengthDiff;
}
BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
: strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
bytes(NULL), bytesCapacity(0), bytesLength(0) {
if(U_FAILURE(errorCode)) {
return;
}
strings=new CharString();
if(strings==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
BytesTrieBuilder::~BytesTrieBuilder() {
delete strings;
delete[] elements;
uprv_free(bytes);
}
BytesTrieBuilder &
BytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
if(bytesLength>0) {
// Cannot add elements after building.
errorCode=U_NO_WRITE_PERMISSION;
return *this;
}
if(elementsLength==elementsCapacity) {
int32_t newCapacity;
if(elementsCapacity==0) {
newCapacity=1024;
} else {
newCapacity=4*elementsCapacity;
}
BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
if(newElements==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return *this; // error instead of dereferencing null
}
if(elementsLength>0) {
uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement));
}
delete[] elements;
elements=newElements;
elementsCapacity=newCapacity;
}
elements[elementsLength++].setTo(s, value, *strings, errorCode);
return *this;
}
U_CDECL_BEGIN
static int32_t U_CALLCONV
compareElementStrings(const void *context, const void *left, const void *right) {
const CharString *strings=static_cast<const CharString *>(context);
const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
return leftElement->compareStringTo(*rightElement, *strings);
}
U_CDECL_END
BytesTrie *
BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
buildBytes(buildOption, errorCode);
BytesTrie *newTrie=NULL;
if(U_SUCCESS(errorCode)) {
newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
if(newTrie==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
} else {
bytes=NULL; // The new trie now owns the array.
bytesCapacity=0;
}
}
return newTrie;
}
StringPiece
BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
buildBytes(buildOption, errorCode);
StringPiece result;
if(U_SUCCESS(errorCode)) {
result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
}
return result;
}
void
BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
if(bytes!=NULL && bytesLength>0) {
// Already built.
return;
}
if(bytesLength==0) {
if(elementsLength==0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
compareElementStrings, strings,
FALSE, // need not be a stable sort
&errorCode);
if(U_FAILURE(errorCode)) {
return;
}
// Duplicate strings are not allowed.
StringPiece prev=elements[0].getString(*strings);
for(int32_t i=1; i<elementsLength; ++i) {
StringPiece current=elements[i].getString(*strings);
if(prev==current) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
prev=current;
}
}
// Create and byte-serialize the trie for the elements.
bytesLength=0;
int32_t capacity=strings->length();
if(capacity<1024) {
capacity=1024;
}
if(bytesCapacity<capacity) {
uprv_free(bytes);
bytes=static_cast<char *>(uprv_malloc(capacity));
if(bytes==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
bytesCapacity=0;
return;
}
bytesCapacity=capacity;
}
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
if(bytes==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
BytesTrieBuilder &
BytesTrieBuilder::clear() {
strings->clear();
elementsLength=0;
bytesLength=0;
return *this;
}
int32_t
BytesTrieBuilder::getElementStringLength(int32_t i) const {
return elements[i].getStringLength(*strings);
}
UChar
BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
return (uint8_t)elements[i].charAt(byteIndex, *strings);
}
int32_t
BytesTrieBuilder::getElementValue(int32_t i) const {
return elements[i].getValue();
}
int32_t
BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
const BytesTrieElement &firstElement=elements[first];
const BytesTrieElement &lastElement=elements[last];
int32_t minStringLength=firstElement.getStringLength(*strings);
while(++byteIndex<minStringLength &&
firstElement.charAt(byteIndex, *strings)==
lastElement.charAt(byteIndex, *strings)) {}
return byteIndex;
}
int32_t
BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
int32_t length=0; // Number of different bytes at byteIndex.
int32_t i=start;
do {
char byte=elements[i++].charAt(byteIndex, *strings);
while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
++i;
}
++length;
} while(i<limit);
return length;
}
int32_t
BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
do {
char byte=elements[i++].charAt(byteIndex, *strings);
while(byte==elements[i].charAt(byteIndex, *strings)) {
++i;
}
} while(--count>0);
return i;
}
int32_t
BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
char b=(char)byte;
while(b==elements[i].charAt(byteIndex, *strings)) {
++i;
}
return i;
}
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(bytes) {
hash=hash*37+ustr_hashCharsN(bytes, len);
}
UBool
BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
if(!LinearMatchNode::operator==(other)) {
return FALSE;
}
const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
return 0==uprv_memcmp(s, o.s, length);
}
void
BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
next->write(builder);
b.write(s, length);
offset=b.write(b.getMinLinearMatch()+length-1);
}
StringTrieBuilder::Node *
BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
Node *nextNode) const {
return new BTLinearMatchNode(
elements[i].getString(*strings).data()+byteIndex,
length,
nextNode);
}
UBool
BytesTrieBuilder::ensureCapacity(int32_t length) {
if(bytes==NULL) {
return FALSE; // previous memory allocation had failed
}
if(length>bytesCapacity) {
int32_t newCapacity=bytesCapacity;
do {
newCapacity*=2;
} while(newCapacity<=length);
char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
if(newBytes==NULL) {
// unable to allocate memory
uprv_free(bytes);
bytes=NULL;
bytesCapacity=0;
return FALSE;
}
uprv_memcpy(newBytes+(newCapacity-bytesLength),
bytes+(bytesCapacity-bytesLength), bytesLength);
uprv_free(bytes);
bytes=newBytes;
bytesCapacity=newCapacity;
}
return TRUE;
}
int32_t
BytesTrieBuilder::write(int32_t byte) {
int32_t newLength=bytesLength+1;
if(ensureCapacity(newLength)) {
bytesLength=newLength;
bytes[bytesCapacity-bytesLength]=(char)byte;
}
return bytesLength;
}
int32_t
BytesTrieBuilder::write(const char *b, int32_t length) {
int32_t newLength=bytesLength+length;
if(ensureCapacity(newLength)) {
bytesLength=newLength;
uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
}
return bytesLength;
}
int32_t
BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
return write(elements[i].getString(*strings).data()+byteIndex, length);
}
int32_t
BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
}
char intBytes[5];
int32_t length=1;
if(i<0 || i>0xffffff) {
intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
intBytes[1]=(char)((uint32_t)i>>24);
intBytes[2]=(char)((uint32_t)i>>16);
intBytes[3]=(char)((uint32_t)i>>8);
intBytes[4]=(char)i;
length=5;
// } else if(i<=BytesTrie::kMaxOneByteValue) {
// intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
} else {
if(i<=BytesTrie::kMaxTwoByteValue) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
} else {
if(i<=BytesTrie::kMaxThreeByteValue) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
} else {
intBytes[0]=(char)BytesTrie::kFourByteValueLead;
intBytes[1]=(char)(i>>16);
length=2;
}
intBytes[length++]=(char)(i>>8);
}
intBytes[length++]=(char)i;
}
intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
return write(intBytes, length);
}
int32_t
BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
int32_t offset=write(node);
if(hasValue) {
offset=writeValueAndFinal(value, FALSE);
}
return offset;
}
int32_t
BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
int32_t i=bytesLength-jumpTarget;
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
return write(i);
}
char intBytes[5];
int32_t length;
if(i<=BytesTrie::kMaxTwoByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
length=1;
} else {
if(i<=BytesTrie::kMaxThreeByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
length=2;
} else {
if(i<=0xffffff) {
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
length=3;
} else {
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
intBytes[1]=(char)(i>>24);
length=4;
}
intBytes[1]=(char)(i>>16);
}
intBytes[1]=(char)(i>>8);
}
intBytes[length++]=(char)i;
return write(intBytes, length);
}
U_NAMESPACE_END

View File

@ -0,0 +1,210 @@
/*
*******************************************************************************
* Copyright (C) 2010-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytestrieiterator.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010nov03
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/stringpiece.h"
#include "charstr.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
UErrorCode &errorCode)
: bytes_(static_cast<const uint8_t *>(trieBytes)),
pos_(bytes_), initialPos_(bytes_),
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
if(U_FAILURE(errorCode)) {
return;
}
// str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
// a public API header for which we would want it to depend only on
// other public headers.
// Unlike BytesTrie itself, its Iterator performs memory allocations anyway
// via the CharString and UVector32 implementations, so this additional
// cost is minimal.
str_=new CharString();
stack_=new UVector32(errorCode);
if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
UErrorCode &errorCode)
: bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
remainingMatchLength_(trie.remainingMatchLength_),
initialRemainingMatchLength_(trie.remainingMatchLength_),
str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
if(U_FAILURE(errorCode)) {
return;
}
str_=new CharString();
stack_=new UVector32(errorCode);
if(U_FAILURE(errorCode)) {
return;
}
if(str_==NULL || stack_==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
if(length>=0) {
// Pending linear-match node, append remaining bytes to str_.
++length;
if(maxLength_>0 && length>maxLength_) {
length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
}
str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
pos_+=length;
remainingMatchLength_-=length;
}
}
BytesTrie::Iterator::~Iterator() {
delete str_;
delete stack_;
}
BytesTrie::Iterator &
BytesTrie::Iterator::reset() {
pos_=initialPos_;
remainingMatchLength_=initialRemainingMatchLength_;
int32_t length=remainingMatchLength_+1; // Remaining match length.
if(maxLength_>0 && length>maxLength_) {
length=maxLength_;
}
str_->truncate(length);
pos_+=length;
remainingMatchLength_-=length;
stack_->setSize(0);
return *this;
}
UBool
BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
UBool
BytesTrie::Iterator::next(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const uint8_t *pos=pos_;
if(pos==NULL) {
if(stack_->isEmpty()) {
return FALSE;
}
// Pop the state off the stack and continue with the next outbound edge of
// the branch node.
int32_t stackSize=stack_->size();
int32_t length=stack_->elementAti(stackSize-1);
pos=bytes_+stack_->elementAti(stackSize-2);
stack_->setSize(stackSize-2);
str_->truncate(length&0xffff);
length=(int32_t)((uint32_t)length>>16);
if(length>1) {
pos=branchNext(pos, length, errorCode);
if(pos==NULL) {
return TRUE; // Reached a final value.
}
} else {
str_->append((char)*pos++, errorCode);
}
}
if(remainingMatchLength_>=0) {
// We only get here if we started in a pending linear-match node
// with more than maxLength remaining bytes.
return truncateAndStop();
}
for(;;) {
int32_t node=*pos++;
if(node>=kMinValueLead) {
// Deliver value for the byte sequence so far.
UBool isFinal=(UBool)(node&kValueIsFinal);
value_=readValue(pos, node>>1);
if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
pos_=NULL;
} else {
pos_=skipValue(pos, node);
}
sp_.set(str_->data(), str_->length());
return TRUE;
}
if(maxLength_>0 && str_->length()==maxLength_) {
return truncateAndStop();
}
if(node<kMinLinearMatch) {
if(node==0) {
node=*pos++;
}
pos=branchNext(pos, node+1, errorCode);
if(pos==NULL) {
return TRUE; // Reached a final value.
}
} else {
// Linear-match node, append length bytes to str_.
int32_t length=node-kMinLinearMatch+1;
if(maxLength_>0 && str_->length()+length>maxLength_) {
str_->append(reinterpret_cast<const char *>(pos),
maxLength_-str_->length(), errorCode);
return truncateAndStop();
}
str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
pos+=length;
}
}
}
UBool
BytesTrie::Iterator::truncateAndStop() {
pos_=NULL;
sp_.set(str_->data(), str_->length());
value_=-1; // no real value for str
return TRUE;
}
// Branch node, needs to take the first outbound edge and push state for the rest.
const uint8_t *
BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
// Push state for the greater-or-equal edge.
stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
// Follow the less-than edge.
length>>=1;
pos=jumpByDelta(pos);
}
// List of key-value pairs where values are either final values or jump deltas.
// Read the first (key, value) pair.
uint8_t trieByte=*pos++;
int32_t node=*pos++;
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
pos=skipValue(pos, node);
stack_->addElement((int32_t)(pos-bytes_), errorCode);
stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
str_->append((char)trieByte, errorCode);
if(isFinal) {
pos_=NULL;
sp_.set(str_->data(), str_->length());
value_=value;
return NULL;
} else {
return pos+value;
}
}
U_NAMESPACE_END

View File

@ -0,0 +1,584 @@
/*
*****************************************************************************
* Copyright (C) 1996-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*****************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/caniter.h"
#include "unicode/normalizer2.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/usetiter.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "cmemory.h"
#include "hash.h"
#include "normalizer2impl.h"
/**
* This class allows one to iterate through all the strings that are canonically equivalent to a given
* string. For example, here are some sample results:
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
1: \u0041\u030A\u0064\u0307\u0327
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
2: \u0041\u030A\u0064\u0327\u0307
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
3: \u0041\u030A\u1E0B\u0327
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
4: \u0041\u030A\u1E11\u0307
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
5: \u00C5\u0064\u0307\u0327
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
6: \u00C5\u0064\u0327\u0307
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
7: \u00C5\u1E0B\u0327
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
8: \u00C5\u1E11\u0307
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
9: \u212B\u0064\u0307\u0327
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
10: \u212B\u0064\u0327\u0307
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
11: \u212B\u1E0B\u0327
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
12: \u212B\u1E11\u0307
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
* since it has not been optimized for that situation.
*@author M. Davis
*@draft
*/
// public
U_NAMESPACE_BEGIN
// TODO: add boilerplate methods.
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
/**
*@param source string to get results for
*/
CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
pieces(NULL),
pieces_length(0),
pieces_lengths(NULL),
current(NULL),
current_length(0),
nfd(*Normalizer2::getNFDInstance(status)),
nfcImpl(*Normalizer2Factory::getNFCImpl(status))
{
if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
setSource(sourceStr, status);
}
}
CanonicalIterator::~CanonicalIterator() {
cleanPieces();
}
void CanonicalIterator::cleanPieces() {
int32_t i = 0;
if(pieces != NULL) {
for(i = 0; i < pieces_length; i++) {
if(pieces[i] != NULL) {
delete[] pieces[i];
}
}
uprv_free(pieces);
pieces = NULL;
pieces_length = 0;
}
if(pieces_lengths != NULL) {
uprv_free(pieces_lengths);
pieces_lengths = NULL;
}
if(current != NULL) {
uprv_free(current);
current = NULL;
current_length = 0;
}
}
/**
*@return gets the source: NOTE: it is the NFD form of source
*/
UnicodeString CanonicalIterator::getSource() {
return source;
}
/**
* Resets the iterator so that one can start again from the beginning.
*/
void CanonicalIterator::reset() {
done = FALSE;
for (int i = 0; i < current_length; ++i) {
current[i] = 0;
}
}
/**
*@return the next string that is canonically equivalent. The value null is returned when
* the iteration is done.
*/
UnicodeString CanonicalIterator::next() {
int32_t i = 0;
if (done) {
buffer.setToBogus();
return buffer;
}
// delete old contents
buffer.remove();
// construct return value
for (i = 0; i < pieces_length; ++i) {
buffer.append(pieces[i][current[i]]);
}
//String result = buffer.toString(); // not needed
// find next value for next time
for (i = current_length - 1; ; --i) {
if (i < 0) {
done = TRUE;
break;
}
current[i]++;
if (current[i] < pieces_lengths[i]) break; // got sequence
current[i] = 0;
}
return buffer;
}
/**
*@param set the source string to iterate against. This allows the same iterator to be used
* while changing the source string, saving object creation.
*/
void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
int32_t list_length = 0;
UChar32 cp = 0;
int32_t start = 0;
int32_t i = 0;
UnicodeString *list = NULL;
nfd.normalize(newSource, source, status);
if(U_FAILURE(status)) {
return;
}
done = FALSE;
cleanPieces();
// catch degenerate case
if (newSource.length() == 0) {
pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
pieces_length = 1;
current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
current_length = 1;
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
current[0] = 0;
pieces[0] = new UnicodeString[1];
pieces_lengths[0] = 1;
if (pieces[0] == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
return;
}
list = new UnicodeString[source.length()];
if (list == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
// i should initialy be the number of code units at the
// start of the string
i = U16_LENGTH(source.char32At(0));
//int32_t i = 1;
// find the segments
// This code iterates through the source string and
// extracts segments that end up on a codepoint that
// doesn't start any decompositions. (Analysis is done
// on the NFD form - see above).
for (; i < source.length(); i += U16_LENGTH(cp)) {
cp = source.char32At(i);
if (nfcImpl.isCanonSegmentStarter(cp)) {
source.extract(start, i-start, list[list_length++]); // add up to i
start = i;
}
}
source.extract(start, i-start, list[list_length++]); // add last one
// allocate the arrays, and find the strings that are CE to each segment
pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
pieces_length = list_length;
pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
current_length = list_length;
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
for (i = 0; i < current_length; i++) {
current[i] = 0;
}
// for each segment, get all the combinations that can produce
// it after NFD normalization
for (i = 0; i < pieces_length; ++i) {
//if (PROGRESS) printf("SEGMENT\n");
pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
}
delete[] list;
return;
// Common section to cleanup all local variables and reset object variables.
CleanPartialInitialization:
if (list != NULL) {
delete[] list;
}
cleanPieces();
}
/**
* Dumb recursive implementation of permutation.
* TODO: optimize
* @param source the string to find permutations for
* @return the results in a set.
*/
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
if(U_FAILURE(status)) {
return;
}
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
int32_t i = 0;
// optimization:
// if zero or one character, just return a set with it
// we check for length < 2 to keep from counting code points all the time
if (source.length() <= 2 && source.countChar32() <= 1) {
UnicodeString *toPut = new UnicodeString(source);
/* test for NULL */
if (toPut == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
result->put(source, toPut, status);
return;
}
// otherwise iterate through the string, and recursively permute all the other characters
UChar32 cp;
Hashtable subpermute(status);
if(U_FAILURE(status)) {
return;
}
subpermute.setValueDeleter(uprv_deleteUObject);
for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
cp = source.char32At(i);
const UHashElement *ne = NULL;
int32_t el = UHASH_FIRST;
UnicodeString subPermuteString = source;
// optimization:
// if the character is canonical combining class zero,
// don't permute it
if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
//System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
continue;
}
subpermute.removeAll();
// see what the permutations of the characters before and after this one are
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return;
}
// The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents
// of source at this point.
// prefix this character to all of them
ne = subpermute.nextElement(el);
while (ne != NULL) {
UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
UnicodeString *chStr = new UnicodeString(cp);
//test for NULL
if (chStr == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
//if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr));
result->put(*chStr, chStr, status);
ne = subpermute.nextElement(el);
}
}
//return result;
}
// privates
// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
Hashtable result(status);
Hashtable permutations(status);
Hashtable basic(status);
if (U_FAILURE(status)) {
return 0;
}
result.setValueDeleter(uprv_deleteUObject);
permutations.setValueDeleter(uprv_deleteUObject);
basic.setValueDeleter(uprv_deleteUObject);
UChar USeg[256];
int32_t segLen = segment.extract(USeg, 256, status);
getEquivalents2(&basic, USeg, segLen, status);
// now get all the permutations
// add only the ones that are canonically equivalent
// TODO: optimize by not permuting any class zero.
const UHashElement *ne = NULL;
int32_t el = UHASH_FIRST;
//Iterator it = basic.iterator();
ne = basic.nextElement(el);
//while (it.hasNext())
while (ne != NULL) {
//String item = (String) it.next();
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
permutations.removeAll();
permute(item, CANITER_SKIP_ZEROES, &permutations, status);
const UHashElement *ne2 = NULL;
int32_t el2 = UHASH_FIRST;
//Iterator it2 = permutations.iterator();
ne2 = permutations.nextElement(el2);
//while (it2.hasNext())
while (ne2 != NULL) {
//String possible = (String) it2.next();
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString attempt;
nfd.normalize(possible, attempt, status);
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
if (attempt==segment) {
//if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
// TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
result.put(possible, new UnicodeString(possible), status); //add(possible);
} else {
//if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
}
ne2 = permutations.nextElement(el2);
}
ne = basic.nextElement(el);
}
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return 0;
}
// convert into a String[] to clean up storage
//String[] finalResult = new String[result.size()];
UnicodeString *finalResult = NULL;
int32_t resultCount;
if((resultCount = result.count())) {
finalResult = new UnicodeString[resultCount];
if (finalResult == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
}
else {
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
//result.toArray(finalResult);
result_len = 0;
el = UHASH_FIRST;
ne = result.nextElement(el);
while(ne != NULL) {
finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
ne = result.nextElement(el);
}
return finalResult;
}
Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
if (U_FAILURE(status)) {
return NULL;
}
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
UnicodeString toPut(segment, segLen);
fillinResult->put(toPut, new UnicodeString(toPut), status);
UnicodeSet starts;
// cycle through all the characters
UChar32 cp;
for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
// see if any character is at the start of some decomposition
U16_GET(segment, 0, i, segLen, cp);
if (!nfcImpl.getCanonStartSet(cp, starts)) {
continue;
}
// if so, see which decompositions match
UnicodeSetIterator iter(starts);
while (iter.next()) {
UChar32 cp2 = iter.getCodepoint();
Hashtable remainder(status);
remainder.setValueDeleter(uprv_deleteUObject);
if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
continue;
}
// there were some matches, so add all the possibilities to the set.
UnicodeString prefix(segment, i);
prefix += cp2;
int32_t el = UHASH_FIRST;
const UHashElement *ne = remainder.nextElement(el);
while (ne != NULL) {
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
UnicodeString *toAdd = new UnicodeString(prefix);
/* test for NULL */
if (toAdd == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
*toAdd += item;
fillinResult->put(*toAdd, toAdd, status);
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
ne = remainder.nextElement(el);
}
}
}
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return NULL;
}
return fillinResult;
}
/**
* See if the decomposition of cp2 is at segment starting at segmentPos
* (with canonical rearrangment!)
* If so, take the remainder, and return the equivalents
*/
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
//if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
//if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
if (U_FAILURE(status)) {
return NULL;
}
UnicodeString temp(comp);
int32_t inputLen=temp.length();
UnicodeString decompString;
nfd.normalize(temp, decompString, status);
if (U_FAILURE(status)) {
return NULL;
}
if (decompString.isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
const UChar *decomp=decompString.getBuffer();
int32_t decompLen=decompString.length();
// See if it matches the start of segment (at segmentPos)
UBool ok = FALSE;
UChar32 cp;
int32_t decompPos = 0;
UChar32 decompCp;
U16_NEXT(decomp, decompPos, decompLen, decompCp);
int32_t i = segmentPos;
while(i < segLen) {
U16_NEXT(segment, i, segLen, cp);
if (cp == decompCp) { // if equal, eat another cp from decomp
//if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp))));
if (decompPos == decompLen) { // done, have all decomp characters!
temp.append(segment+i, segLen-i);
ok = TRUE;
break;
}
U16_NEXT(decomp, decompPos, decompLen, decompCp);
} else {
//if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp))));
// brute force approach
temp.append(cp);
/* TODO: optimize
// since we know that the classes are monotonically increasing, after zero
// e.g. 0 5 7 9 0 3
// we can do an optimization
// there are only a few cases that work: zero, less, same, greater
// if both classes are the same, we fail
// if the decomp class < the segment class, we fail
segClass = getClass(cp);
if (decompClass <= segClass) return null;
*/
}
}
if (!ok)
return NULL; // we failed, characters left over
//if (PROGRESS) printf("Matches\n");
if (inputLen == temp.length()) {
fillinResult->put(UnicodeString(), new UnicodeString(), status);
return fillinResult; // succeed, but no remainder
}
// brute force approach
// check to make sure result is canonically equivalent
UnicodeString trial;
nfd.normalize(temp, trial, status);
if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
return NULL;
}
return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View File

@ -0,0 +1,98 @@
/*
**********************************************************************
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#include "unicode/chariter.h"
U_NAMESPACE_BEGIN
ForwardCharacterIterator::~ForwardCharacterIterator() {}
ForwardCharacterIterator::ForwardCharacterIterator()
: UObject()
{}
ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
: UObject(other)
{}
CharacterIterator::CharacterIterator()
: textLength(0), pos(0), begin(0), end(0) {
}
CharacterIterator::CharacterIterator(int32_t length)
: textLength(length), pos(0), begin(0), end(length) {
if(textLength < 0) {
textLength = end = 0;
}
}
CharacterIterator::CharacterIterator(int32_t length, int32_t position)
: textLength(length), pos(position), begin(0), end(length) {
if(textLength < 0) {
textLength = end = 0;
}
if(pos < 0) {
pos = 0;
} else if(pos > end) {
pos = end;
}
}
CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
: textLength(length), pos(position), begin(textBegin), end(textEnd) {
if(textLength < 0) {
textLength = 0;
}
if(begin < 0) {
begin = 0;
} else if(begin > textLength) {
begin = textLength;
}
if(end < begin) {
end = begin;
} else if(end > textLength) {
end = textLength;
}
if(pos < begin) {
pos = begin;
} else if(pos > end) {
pos = end;
}
}
CharacterIterator::~CharacterIterator() {}
CharacterIterator::CharacterIterator(const CharacterIterator &that) :
ForwardCharacterIterator(that),
textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
{
}
CharacterIterator &
CharacterIterator::operator=(const CharacterIterator &that) {
ForwardCharacterIterator::operator=(that);
textLength = that.textLength;
pos = that.pos;
begin = that.begin;
end = that.end;
return *this;
}
// implementing first[32]PostInc() directly in a subclass should be faster
// but these implementations make subclassing a little easier
UChar
CharacterIterator::firstPostInc(void) {
setToStart();
return nextPostInc();
}
UChar32
CharacterIterator::first32PostInc(void) {
setToStart();
return next32PostInc();
}
U_NAMESPACE_END

View File

@ -0,0 +1,171 @@
/*
*******************************************************************************
* Copyright (C) 2010-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: charstr.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010may19
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "uinvchar.h"
U_NAMESPACE_BEGIN
CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
len=s.len;
uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1);
}
return *this;
}
int32_t CharString::lastIndexOf(char c) const {
for(int32_t i=len; i>0;) {
if(buffer[--i]==c) {
return i;
}
}
return -1;
}
CharString &CharString::truncate(int32_t newLength) {
if(newLength<0) {
newLength=0;
}
if(newLength<len) {
buffer[len=newLength]=0;
}
return *this;
}
CharString &CharString::append(char c, UErrorCode &errorCode) {
if(ensureCapacity(len+2, 0, errorCode)) {
buffer[len++]=c;
buffer[len]=0;
}
return *this;
}
CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
if(sLength<-1 || (s==NULL && sLength!=0)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return *this;
}
if(sLength<0) {
sLength=uprv_strlen(s);
}
if(sLength>0) {
if(s==(buffer.getAlias()+len)) {
// The caller wrote into the getAppendBuffer().
if(sLength>=(buffer.getCapacity()-len)) {
// The caller wrote too much.
errorCode=U_INTERNAL_PROGRAM_ERROR;
} else {
buffer[len+=sLength]=0;
}
} else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) &&
sLength>=(buffer.getCapacity()-len)
) {
// (Part of) this string is appended to itself which requires reallocation,
// so we have to make a copy of the substring and append that.
return append(CharString(s, sLength, errorCode), errorCode);
} else if(ensureCapacity(len+sLength+1, 0, errorCode)) {
uprv_memcpy(buffer.getAlias()+len, s, sLength);
buffer[len+=sLength]=0;
}
}
return *this;
}
char *CharString::getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
resultCapacity=0;
return NULL;
}
int32_t appendCapacity=buffer.getCapacity()-len-1; // -1 for NUL
if(appendCapacity>=minCapacity) {
resultCapacity=appendCapacity;
return buffer.getAlias()+len;
}
if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) {
resultCapacity=buffer.getCapacity()-len-1;
return buffer.getAlias()+len;
}
resultCapacity=0;
return NULL;
}
CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
if (!uprv_isInvariantUnicodeString(s)) {
errorCode = U_INVARIANT_CONVERSION_ERROR;
return *this;
}
if(ensureCapacity(len+s.length()+1, 0, errorCode)) {
len+=s.extract(0, 0x7fffffff, buffer.getAlias()+len, buffer.getCapacity()-len, US_INV);
}
return *this;
}
UBool CharString::ensureCapacity(int32_t capacity,
int32_t desiredCapacityHint,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return FALSE;
}
if(capacity>buffer.getCapacity()) {
if(desiredCapacityHint==0) {
desiredCapacityHint=capacity+buffer.getCapacity();
}
if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) &&
buffer.resize(capacity, len+1)==NULL
) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
}
return TRUE;
}
CharString &CharString::appendPathPart(const StringPiece &s, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
if(s.length()==0) {
return *this;
}
char c;
if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
append(U_FILE_SEP_CHAR, errorCode);
}
append(s, errorCode);
return *this;
}
CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) {
char c;
if(U_SUCCESS(errorCode) && len>0 &&
(c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
append(U_FILE_SEP_CHAR, errorCode);
}
return *this;
}
U_NAMESPACE_END

View File

@ -0,0 +1,139 @@
/*
**********************************************************************
* Copyright (c) 2001-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/19/2001 aliu Creation.
* 05/19/2010 markus Rewritten from scratch
**********************************************************************
*/
#ifndef CHARSTRING_H
#define CHARSTRING_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
// Windows needs us to DLL-export the MaybeStackArray template specialization,
// but MacOS X cannot handle it. Same as in digitlst.h.
#if !U_PLATFORM_IS_DARWIN_BASED
template class U_COMMON_API MaybeStackArray<char, 40>;
#endif
/**
* ICU-internal char * string class.
* This class does not assume or enforce any particular character encoding.
* Raw bytes can be stored. The string object owns its characters.
* A terminating NUL is stored, but the class does not prevent embedded NUL characters.
*
* This class wants to be convenient but is also deliberately minimalist.
* Please do not add methods if they only add minor convenience.
* For example:
* cs.data()[5]='a'; // no need for setCharAt(5, 'a')
*/
class U_COMMON_API CharString : public UMemory {
public:
CharString() : len(0) { buffer[0]=0; }
CharString(const StringPiece &s, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, errorCode);
}
CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, errorCode);
}
CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, sLength, errorCode);
}
~CharString() {}
/**
* Replaces this string's contents with the other string's contents.
* CharString does not support the standard copy constructor nor
* the assignment operator, to make copies explicit and to
* use a UErrorCode where memory allocations might be needed.
*/
CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
UBool isEmpty() const { return len==0; }
int32_t length() const { return len; }
char operator[](int32_t index) const { return buffer[index]; }
StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
const char *data() const { return buffer.getAlias(); }
char *data() { return buffer.getAlias(); }
/** @return last index of c, or -1 if c is not in this string */
int32_t lastIndexOf(char c) const;
CharString &clear() { len=0; buffer[0]=0; return *this; }
CharString &truncate(int32_t newLength);
CharString &append(char c, UErrorCode &errorCode);
CharString &append(const StringPiece &s, UErrorCode &errorCode) {
return append(s.data(), s.length(), errorCode);
}
CharString &append(const CharString &s, UErrorCode &errorCode) {
return append(s.data(), s.length(), errorCode);
}
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
* There will additionally be space for a terminating NUL right at resultCapacity.
* (This function is similar to ByteSink.GetAppendBuffer().)
*
* The returned buffer is only valid until the next write operation
* on this string.
*
* After writing at most resultCapacity bytes, call append() with the
* pointer returned from this function and the number of bytes written.
*
* @param minCapacity required minimum capacity of the returned buffer;
* must be non-negative
* @param desiredCapacityHint desired capacity of the returned buffer;
* must be non-negative
* @param resultCapacity will be set to the capacity of the returned buffer
* @param errorCode in/out error code
* @return a buffer with resultCapacity>=min_capacity
*/
char *getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,
UErrorCode &errorCode);
CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
/**
* Appends a filename/path part, e.g., a directory name.
* First appends a U_FILE_SEP_CHAR if necessary.
* Does nothing if s is empty.
*/
CharString &appendPathPart(const StringPiece &s, UErrorCode &errorCode);
/**
* Appends a U_FILE_SEP_CHAR if this string is not empty
* and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
*/
CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
private:
MaybeStackArray<char, 40> buffer;
int32_t len;
UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
CharString(const CharString &other); // forbid copying of this class
CharString &operator=(const CharString &other); // forbid copying of this class
};
U_NAMESPACE_END
#endif
//eof

View File

@ -0,0 +1,160 @@
/*
******************************************************************************
*
* Copyright (C) 2002-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File cmemory.c ICU Heap allocation.
* All ICU heap allocation, both for C and C++ new of ICU
* class types, comes through these functions.
*
* If you have a need to replace ICU allocation, this is the
* place to do it.
*
* Note that uprv_malloc(0) returns a non-NULL pointer, and
* that a subsequent free of that pointer value is a NOP.
*
******************************************************************************
*/
#include "unicode/uclean.h"
#include "cmemory.h"
#include "putilimp.h"
#include "uassert.h"
#include <stdlib.h>
/* uprv_malloc(0) returns a pointer to this read-only data. */
static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
/* Function Pointers for user-supplied heap functions */
static const void *pContext;
static UMemAllocFn *pAlloc;
static UMemReallocFn *pRealloc;
static UMemFreeFn *pFree;
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
static int n=0;
static long b=0;
#endif
#if U_DEBUG
static char gValidMemorySink = 0;
U_CAPI void uprv_checkValidMemory(const void *p, size_t n) {
/*
* Access the memory to ensure that it's all valid.
* Load and save a computed value to try to ensure that the compiler
* does not throw away the whole loop.
* A thread analyzer might complain about un-mutexed access to gValidMemorySink
* which is true but harmless because no one ever uses the value in gValidMemorySink.
*/
const char *s = (const char *)p;
char c = gValidMemorySink;
size_t i;
U_ASSERT(p != NULL);
for(i = 0; i < n; ++i) {
c ^= s[i];
}
gValidMemorySink = c;
}
#endif /* U_DEBUG */
U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#if 1
putchar('>');
fflush(stdout);
#else
fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
#endif
#endif
if (s > 0) {
if (pAlloc) {
return (*pAlloc)(pContext, s);
} else {
return uprv_default_malloc(s);
}
} else {
return (void *)zeroMem;
}
}
U_CAPI void * U_EXPORT2
uprv_realloc(void * buffer, size_t size) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
putchar('~');
fflush(stdout);
#endif
if (buffer == zeroMem) {
return uprv_malloc(size);
} else if (size == 0) {
if (pFree) {
(*pFree)(pContext, buffer);
} else {
uprv_default_free(buffer);
}
return (void *)zeroMem;
} else {
if (pRealloc) {
return (*pRealloc)(pContext, buffer, size);
} else {
return uprv_default_realloc(buffer, size);
}
}
}
U_CAPI void U_EXPORT2
uprv_free(void *buffer) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
putchar('<');
fflush(stdout);
#endif
if (buffer != zeroMem) {
if (pFree) {
(*pFree)(pContext, buffer);
} else {
uprv_default_free(buffer);
}
}
}
U_CAPI void * U_EXPORT2
uprv_calloc(size_t num, size_t size) {
void *mem = NULL;
size *= num;
mem = uprv_malloc(size);
if (mem) {
uprv_memset(mem, 0, size);
}
return mem;
}
U_CAPI void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status)
{
if (U_FAILURE(*status)) {
return;
}
if (a==NULL || r==NULL || f==NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
pContext = context;
pAlloc = a;
pRealloc = r;
pFree = f;
}
U_CFUNC UBool cmemory_cleanup(void) {
pContext = NULL;
pAlloc = NULL;
pRealloc = NULL;
pFree = NULL;
return TRUE;
}

View File

@ -0,0 +1,651 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CMEMORY.H
*
* Contains stdlib.h/string.h memory functions
*
* @author Bertrand A. Damiba
*
* Modification History:
*
* Date Name Description
* 6/20/98 Bertrand Created.
* 05/03/99 stephen Changed from functions to macros.
*
******************************************************************************
*/
#ifndef CMEMORY_H
#define CMEMORY_H
#include "unicode/utypes.h"
#include <stddef.h>
#include <string.h>
#include "unicode/localpointer.h"
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
#endif
#if U_DEBUG
/*
* The C++ standard requires that the source pointer for memcpy() & memmove()
* is valid, not NULL, and not at the end of an allocated memory block.
* In debug mode, we read one byte from the source point to verify that it's
* a valid, readable pointer.
*/
U_CAPI void uprv_checkValidMemory(const void *p, size_t n);
#define uprv_memcpy(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size))
#define uprv_memmove(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size))
#else
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
#endif /* U_DEBUG */
/**
* \def UPRV_LENGTHOF
* Convenience macro to determine the length of a fixed array at compile-time.
* @param array A fixed length array
* @return The length of the array, in elements
* @internal
*/
#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
U_CAPI void * U_EXPORT2
uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);
U_CAPI void U_EXPORT2
uprv_free(void *mem);
U_CAPI void * U_EXPORT2
uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);
/**
* This should align the memory properly on any machine.
* This is very useful for the safeClone functions.
*/
typedef union {
long t1;
double t2;
void *t3;
} UAlignedMemory;
/**
* Get the least significant bits of a pointer (a memory address).
* For example, with a mask of 3, the macro gets the 2 least significant bits,
* which will be 0 if the pointer is 32-bit (4-byte) aligned.
*
* ptrdiff_t is the most appropriate integer type to cast to.
* size_t should work too, since on most (or all?) platforms it has the same
* width as ptrdiff_t.
*/
#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
/**
* Get the amount of bytes that a pointer is off by from
* the previous UAlignedMemory-aligned pointer.
*/
#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
/**
* Get the amount of bytes to add to a pointer
* in order to get the next UAlignedMemory-aligned address.
*/
#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
/**
* Heap clean up function, called from u_cleanup()
* Clears any user heap functions from u_setMemoryFunctions()
* Does NOT deallocate any remaining allocated memory.
*/
U_CFUNC UBool
cmemory_cleanup(void);
/**
* A function called by <TT>uhash_remove</TT>,
* <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
* an existing key or value.
* @param obj A key or value stored in a hashtable
* @see uprv_deleteUObject
*/
typedef void U_CALLCONV UObjectDeleter(void* obj);
/**
* Deleter for UObject instances.
* Works for all subclasses of UObject because it has a virtual destructor.
*/
U_CAPI void U_EXPORT2
uprv_deleteUObject(void *obj);
#ifdef __cplusplus
U_NAMESPACE_BEGIN
/**
* "Smart pointer" class, deletes memory via uprv_free().
* For most methods see the LocalPointerBase base class.
* Adds operator[] for array item access.
*
* @see LocalPointerBase
*/
template<typename T>
class LocalMemory : public LocalPointerBase<T> {
public:
/**
* Constructor takes ownership.
* @param p simple pointer to an array of T items that is adopted
*/
explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
#if U_HAVE_RVALUE_REFERENCES
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
*/
LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
#endif
/**
* Destructor deletes the memory it owns.
*/
~LocalMemory() {
uprv_free(LocalPointerBase<T>::ptr);
}
#if U_HAVE_RVALUE_REFERENCES
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
* @param src source smart pointer
* @return *this
*/
LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
return moveFrom(src);
}
#endif
/**
* Move assignment, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
*
* Can be called explicitly, does not need C++11 support.
* @param src source smart pointer
* @return *this
*/
LocalMemory<T> &moveFrom(LocalMemory<T> &src) U_NOEXCEPT {
delete[] LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=src.ptr;
src.ptr=NULL;
return *this;
}
/**
* Swap pointers.
* @param other other smart pointer
*/
void swap(LocalMemory<T> &other) U_NOEXCEPT {
T *temp=LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=other.ptr;
other.ptr=temp;
}
/**
* Non-member LocalMemory swap function.
* @param p1 will get p2's pointer
* @param p2 will get p1's pointer
*/
friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) U_NOEXCEPT {
p1.swap(p2);
}
/**
* Deletes the array it owns,
* and adopts (takes ownership of) the one passed in.
* @param p simple pointer to an array of T items that is adopted
*/
void adoptInstead(T *p) {
uprv_free(LocalPointerBase<T>::ptr);
LocalPointerBase<T>::ptr=p;
}
/**
* Deletes the array it owns, allocates a new one and reset its bytes to 0.
* Returns the new array pointer.
* If the allocation fails, then the current array is unchanged and
* this method returns NULL.
* @param newCapacity must be >0
* @return the allocated array pointer, or NULL if the allocation failed
*/
inline T *allocateInsteadAndReset(int32_t newCapacity=1);
/**
* Deletes the array it owns and allocates a new one, copying length T items.
* Returns the new array pointer.
* If the allocation fails, then the current array is unchanged and
* this method returns NULL.
* @param newCapacity must be >0
* @param length number of T items to be copied from the old array to the new one;
* must be no more than the capacity of the old array,
* which the caller must track because the LocalMemory does not track it
* @return the allocated array pointer, or NULL if the allocation failed
*/
inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
/**
* Array item access (writable).
* No index bounds check.
* @param i array index
* @return reference to the array item
*/
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
};
template<typename T>
inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
if(newCapacity>0) {
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
if(p!=NULL) {
uprv_memset(p, 0, newCapacity*sizeof(T));
uprv_free(LocalPointerBase<T>::ptr);
LocalPointerBase<T>::ptr=p;
}
return p;
} else {
return NULL;
}
}
template<typename T>
inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
if(newCapacity>0) {
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
if(p!=NULL) {
if(length>0) {
if(length>newCapacity) {
length=newCapacity;
}
uprv_memcpy(p, LocalPointerBase<T>::ptr, length*sizeof(T));
}
uprv_free(LocalPointerBase<T>::ptr);
LocalPointerBase<T>::ptr=p;
}
return p;
} else {
return NULL;
}
}
/**
* Simple array/buffer management class using uprv_malloc() and uprv_free().
* Provides an internal array with fixed capacity. Can alias another array
* or allocate one.
*
* The array address is properly aligned for type T. It might not be properly
* aligned for types larger than T (or larger than the largest subtype of T).
*
* Unlike LocalMemory and LocalArray, this class never adopts
* (takes ownership of) another array.
*/
template<typename T, int32_t stackCapacity>
class MaybeStackArray {
public:
/**
* Default constructor initializes with internal T[stackCapacity] buffer.
*/
MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
/**
* Destructor deletes the array (if owned).
*/
~MaybeStackArray() { releaseArray(); }
/**
* Returns the array capacity (number of T items).
* @return array capacity
*/
int32_t getCapacity() const { return capacity; }
/**
* Access without ownership change.
* @return the array pointer
*/
T *getAlias() const { return ptr; }
/**
* Returns the array limit. Simple convenience method.
* @return getAlias()+getCapacity()
*/
T *getArrayLimit() const { return getAlias()+capacity; }
// No "operator T *() const" because that can make
// expressions like mbs[index] ambiguous for some compilers.
/**
* Array item access (const).
* No index bounds check.
* @param i array index
* @return reference to the array item
*/
const T &operator[](ptrdiff_t i) const { return ptr[i]; }
/**
* Array item access (writable).
* No index bounds check.
* @param i array index
* @return reference to the array item
*/
T &operator[](ptrdiff_t i) { return ptr[i]; }
/**
* Deletes the array (if owned) and aliases another one, no transfer of ownership.
* If the arguments are illegal, then the current array is unchanged.
* @param otherArray must not be NULL
* @param otherCapacity must be >0
*/
void aliasInstead(T *otherArray, int32_t otherCapacity) {
if(otherArray!=NULL && otherCapacity>0) {
releaseArray();
ptr=otherArray;
capacity=otherCapacity;
needToRelease=FALSE;
}
}
/**
* Deletes the array (if owned) and allocates a new one, copying length T items.
* Returns the new array pointer.
* If the allocation fails, then the current array is unchanged and
* this method returns NULL.
* @param newCapacity can be less than or greater than the current capacity;
* must be >0
* @param length number of T items to be copied from the old array to the new one
* @return the allocated array pointer, or NULL if the allocation failed
*/
inline T *resize(int32_t newCapacity, int32_t length=0);
/**
* Gives up ownership of the array if owned, or else clones it,
* copying length T items; resets itself to the internal stack array.
* Returns NULL if the allocation failed.
* @param length number of T items to copy when cloning,
* and capacity of the clone when cloning
* @param resultCapacity will be set to the returned array's capacity (output-only)
* @return the array pointer;
* caller becomes responsible for deleting the array
*/
inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
private:
T *ptr;
int32_t capacity;
UBool needToRelease;
T stackArray[stackCapacity];
void releaseArray() {
if(needToRelease) {
uprv_free(ptr);
}
}
/* No comparison operators with other MaybeStackArray's. */
bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
/* No ownership transfer: No copy constructor, no assignment operator. */
MaybeStackArray(const MaybeStackArray & /*other*/) {}
void operator=(const MaybeStackArray & /*other*/) {}
// No heap allocation. Use only on the stack.
// (Declaring these functions private triggers a cascade of problems:
// MSVC insists on exporting an instantiation of MaybeStackArray, which
// requires that all functions be defined.
// An empty implementation of new() is rejected, it must return a value.
// Returning NULL is rejected by gcc for operator new.
// The expedient thing is just not to override operator new.
// While relatively pointless, heap allocated instances will function.
// static void * U_EXPORT2 operator new(size_t size);
// static void * U_EXPORT2 operator new[](size_t size);
#if U_HAVE_PLACEMENT_NEW
// static void * U_EXPORT2 operator new(size_t, void *ptr);
#endif
};
template<typename T, int32_t stackCapacity>
inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
if(newCapacity>0) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
#endif
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
if(p!=NULL) {
if(length>0) {
if(length>capacity) {
length=capacity;
}
if(length>newCapacity) {
length=newCapacity;
}
uprv_memcpy(p, ptr, length*sizeof(T));
}
releaseArray();
ptr=p;
capacity=newCapacity;
needToRelease=TRUE;
}
return p;
} else {
return NULL;
}
}
template<typename T, int32_t stackCapacity>
inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
T *p;
if(needToRelease) {
p=ptr;
} else if(length<=0) {
return NULL;
} else {
if(length>capacity) {
length=capacity;
}
p=(T *)uprv_malloc(length*sizeof(T));
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
#endif
if(p==NULL) {
return NULL;
}
uprv_memcpy(p, ptr, length*sizeof(T));
}
resultCapacity=length;
ptr=stackArray;
capacity=stackCapacity;
needToRelease=FALSE;
return p;
}
/**
* Variant of MaybeStackArray that allocates a header struct and an array
* in one contiguous memory block, using uprv_malloc() and uprv_free().
* Provides internal memory with fixed array capacity. Can alias another memory
* block or allocate one.
* The stackCapacity is the number of T items in the internal memory,
* not counting the H header.
* Unlike LocalMemory and LocalArray, this class never adopts
* (takes ownership of) another memory block.
*/
template<typename H, typename T, int32_t stackCapacity>
class MaybeStackHeaderAndArray {
public:
/**
* Default constructor initializes with internal H+T[stackCapacity] buffer.
*/
MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
/**
* Destructor deletes the memory (if owned).
*/
~MaybeStackHeaderAndArray() { releaseMemory(); }
/**
* Returns the array capacity (number of T items).
* @return array capacity
*/
int32_t getCapacity() const { return capacity; }
/**
* Access without ownership change.
* @return the header pointer
*/
H *getAlias() const { return ptr; }
/**
* Returns the array start.
* @return array start, same address as getAlias()+1
*/
T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
/**
* Returns the array limit.
* @return array limit
*/
T *getArrayLimit() const { return getArrayStart()+capacity; }
/**
* Access without ownership change. Same as getAlias().
* A class instance can be used directly in expressions that take a T *.
* @return the header pointer
*/
operator H *() const { return ptr; }
/**
* Array item access (writable).
* No index bounds check.
* @param i array index
* @return reference to the array item
*/
T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
/**
* Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
* If the arguments are illegal, then the current memory is unchanged.
* @param otherArray must not be NULL
* @param otherCapacity must be >0
*/
void aliasInstead(H *otherMemory, int32_t otherCapacity) {
if(otherMemory!=NULL && otherCapacity>0) {
releaseMemory();
ptr=otherMemory;
capacity=otherCapacity;
needToRelease=FALSE;
}
}
/**
* Deletes the memory block (if owned) and allocates a new one,
* copying the header and length T array items.
* Returns the new header pointer.
* If the allocation fails, then the current memory is unchanged and
* this method returns NULL.
* @param newCapacity can be less than or greater than the current capacity;
* must be >0
* @param length number of T items to be copied from the old array to the new one
* @return the allocated pointer, or NULL if the allocation failed
*/
inline H *resize(int32_t newCapacity, int32_t length=0);
/**
* Gives up ownership of the memory if owned, or else clones it,
* copying the header and length T array items; resets itself to the internal memory.
* Returns NULL if the allocation failed.
* @param length number of T items to copy when cloning,
* and array capacity of the clone when cloning
* @param resultCapacity will be set to the returned array's capacity (output-only)
* @return the header pointer;
* caller becomes responsible for deleting the array
*/
inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
private:
H *ptr;
int32_t capacity;
UBool needToRelease;
// stackHeader must precede stackArray immediately.
H stackHeader;
T stackArray[stackCapacity];
void releaseMemory() {
if(needToRelease) {
uprv_free(ptr);
}
}
/* No comparison operators with other MaybeStackHeaderAndArray's. */
bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
/* No ownership transfer: No copy constructor, no assignment operator. */
MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
// No heap allocation. Use only on the stack.
// (Declaring these functions private triggers a cascade of problems;
// see the MaybeStackArray class for details.)
// static void * U_EXPORT2 operator new(size_t size);
// static void * U_EXPORT2 operator new[](size_t size);
#if U_HAVE_PLACEMENT_NEW
// static void * U_EXPORT2 operator new(size_t, void *ptr);
#endif
};
template<typename H, typename T, int32_t stackCapacity>
inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
int32_t length) {
if(newCapacity>=0) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
#endif
H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
if(p!=NULL) {
if(length<0) {
length=0;
} else if(length>0) {
if(length>capacity) {
length=capacity;
}
if(length>newCapacity) {
length=newCapacity;
}
}
uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
releaseMemory();
ptr=p;
capacity=newCapacity;
needToRelease=TRUE;
}
return p;
} else {
return NULL;
}
}
template<typename H, typename T, int32_t stackCapacity>
inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
int32_t &resultCapacity) {
H *p;
if(needToRelease) {
p=ptr;
} else {
if(length<0) {
length=0;
} else if(length>capacity) {
length=capacity;
}
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
#endif
p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
if(p==NULL) {
return NULL;
}
uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
}
resultCapacity=length;
ptr=&stackHeader;
capacity=stackCapacity;
needToRelease=FALSE;
return p;
}
U_NAMESPACE_END
#endif /* __cplusplus */
#endif /* CMEMORY_H */

View File

@ -0,0 +1,95 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cpputils.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*/
#ifndef CPPUTILS_H
#define CPPUTILS_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "cmemory.h"
/*==========================================================================*/
/* Array copy utility functions */
/*==========================================================================*/
static
inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const double* src, int32_t srcStart,
double* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
int8_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
int16_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
int32_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void
uprv_arrayCopy(const UChar *src, int32_t srcStart,
UChar *dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
/**
* Copy an array of UnicodeString OBJECTS (not pointers).
* @internal
*/
static inline void
uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count)
{ while(count-- > 0) *dst++ = *src++; }
/**
* Copy an array of UnicodeString OBJECTS (not pointers).
* @internal
*/
static inline void
uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart,
icu::UnicodeString *dst, int32_t dstStart, int32_t count)
{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
/**
* Checks that the string is readable and writable.
* Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer().
*/
inline void
uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode) && s.isBogus()) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
}
}
#endif /* _CPPUTILS */

View File

@ -0,0 +1,339 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.C
*
* @author Helena Shih
*
* Modification History:
*
* Date Name Description
* 6/18/98 hshih Created
* 09/08/98 stephen Added include for ctype, for Mac Port
* 11/15/99 helena Integrated S/390 IEEE changes.
******************************************************************************
*/
#include <stdlib.h>
#include <stdio.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
/*
* We hardcode case conversion for invariant characters to match our expectation
* and the compiler execution charset.
* This prevents problems on systems
* - with non-default casing behavior, like Turkish system locales where
* tolower('I') maps to dotless i and toupper('i') maps to dotted I
* - where there are no lowercase Latin characters at all, or using different
* codes (some old EBCDIC codepages)
*
* This works because the compiler usually runs on a platform where the execution
* charset includes all of the invariant characters at their expected
* code positions, so that the char * string literals in ICU code match
* the char literals here.
*
* Note that the set of lowercase Latin letters is discontiguous in EBCDIC
* and the set of uppercase Latin letters is discontiguous as well.
*/
U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
return
('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
#else
return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
#endif
}
U_CAPI char U_EXPORT2
uprv_toupper(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
c=(char)(c+('A'-'a'));
}
#else
if('a'<=c && c<='z') {
c=(char)(c+('A'-'a'));
}
#endif
return c;
}
#if 0
/*
* Commented out because cstring.h defines uprv_tolower() to be
* the same as either uprv_asciitolower() or uprv_ebcdictolower()
* to reduce the amount of code to cover with tests.
*
* Note that this uprv_tolower() definition is likely to work for most
* charset families, not just ASCII and EBCDIC, because its #else branch
* is written generically.
*/
U_CAPI char U_EXPORT2
uprv_tolower(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
c=(char)(c+('a'-'A'));
}
#else
if('A'<=c && c<='Z') {
c=(char)(c+('a'-'A'));
}
#endif
return c;
}
#endif
U_CAPI char U_EXPORT2
uprv_asciitolower(char c) {
if(0x41<=c && c<=0x5a) {
c=(char)(c+0x20);
}
return c;
}
U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c) {
if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
(0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
(0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
) {
c=(char)(c-0x40);
}
return c;
}
U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str)
{
char* origPtr = str;
if (str) {
do
*str = (char)uprv_tolower(*str);
while (*(str++));
}
return origPtr;
}
U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str)
{
char* origPtr = str;
if (str) {
do
*str = (char)uprv_toupper(*str);
while (*(str++));
}
return origPtr;
}
/*
* Takes a int32_t and fills in a char* string with that number "radix"-based.
* Does not handle negative values (makes an empty string for them).
* Writes at most 12 chars ("-2147483647" plus NUL).
* Returns the length of the string (not including the NUL).
*/
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
{
char tbuf[30];
int32_t tbx = sizeof(tbuf);
uint8_t digit;
int32_t length = 0;
uint32_t uval;
U_ASSERT(radix>=2 && radix<=16);
uval = (uint32_t) v;
if(v<0 && radix == 10) {
/* Only in base 10 do we conside numbers to be signed. */
uval = (uint32_t)(-v);
buffer[length++] = '-';
}
tbx = sizeof(tbuf)-1;
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
do {
digit = (uint8_t)(uval % radix);
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
uval = uval / radix;
} while (uval != 0);
/* copy converted number into user buffer */
uprv_strcpy(buffer+length, tbuf+tbx);
length += sizeof(tbuf) - tbx -1;
return length;
}
/*
* Takes a int64_t and fills in a char* string with that number "radix"-based.
* Writes at most 21: chars ("-9223372036854775807" plus NUL).
* Returns the length of the string, not including the terminating NULL.
*/
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
{
char tbuf[30];
int32_t tbx = sizeof(tbuf);
uint8_t digit;
int32_t length = 0;
uint64_t uval;
U_ASSERT(radix>=2 && radix<=16);
uval = (uint64_t) v;
if(v<0 && radix == 10) {
/* Only in base 10 do we conside numbers to be signed. */
uval = (uint64_t)(-v);
buffer[length++] = '-';
}
tbx = sizeof(tbuf)-1;
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
do {
digit = (uint8_t)(uval % radix);
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
uval = uval / radix;
} while (uval != 0);
/* copy converted number into user buffer */
uprv_strcpy(buffer+length, tbuf+tbx);
length += sizeof(tbuf) - tbx -1;
return length;
}
U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix)
{
char *end;
return uprv_strtoul(integerString, &end, radix);
}
U_CAPI int U_EXPORT2
uprv_stricmp(const char *str1, const char *str2) {
if(str1==NULL) {
if(str2==NULL) {
return 0;
} else {
return -1;
}
} else if(str2==NULL) {
return 1;
} else {
/* compare non-NULL strings lexically with lowercase */
int rc;
unsigned char c1, c2;
for(;;) {
c1=(unsigned char)*str1;
c2=(unsigned char)*str2;
if(c1==0) {
if(c2==0) {
return 0;
} else {
return -1;
}
} else if(c2==0) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
if(rc!=0) {
return rc;
}
}
++str1;
++str2;
}
}
}
U_CAPI int U_EXPORT2
uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
if(str1==NULL) {
if(str2==NULL) {
return 0;
} else {
return -1;
}
} else if(str2==NULL) {
return 1;
} else {
/* compare non-NULL strings lexically with lowercase */
int rc;
unsigned char c1, c2;
for(; n--;) {
c1=(unsigned char)*str1;
c2=(unsigned char)*str2;
if(c1==0) {
if(c2==0) {
return 0;
} else {
return -1;
}
} else if(c2==0) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
if(rc!=0) {
return rc;
}
}
++str1;
++str2;
}
}
return 0;
}
U_CAPI char* U_EXPORT2
uprv_strdup(const char *src) {
size_t len = uprv_strlen(src) + 1;
char *dup = (char *) uprv_malloc(len);
if (dup) {
uprv_memcpy(dup, src, len);
}
return dup;
}
U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n) {
char *dup;
if(n < 0) {
dup = uprv_strdup(src);
} else {
dup = (char*)uprv_malloc(n+1);
if (dup) {
uprv_memcpy(dup, src, n);
dup[n] = 0;
}
}
return dup;
}

View File

@ -0,0 +1,140 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.H
*
* Contains CString interface
*
* @author Helena Shih
*
* Modification History:
*
* Date Name Description
* 6/17/98 hshih Created.
* 05/03/99 stephen Changed from functions to macros.
* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower
*
******************************************************************************
*/
#ifndef CSTRING_H
#define CSTRING_H 1
#include "unicode/utypes.h"
#include "cmemory.h"
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src)
#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
#if U_DEBUG
#define uprv_strncpy(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size))
#define uprv_strncmp(s1, s2, n) ( \
uprv_checkValidMemory(s1, 1), \
uprv_checkValidMemory(s2, 1), \
U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n))
#define uprv_strncat(dst, src, n) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE strncat(dst, src, n))
#else
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
#endif /* U_DEBUG */
/**
* Is c an ASCII-repertoire letter a-z or A-Z?
* Note: The implementation is specific to whether ICU is compiled for
* an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
*/
U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c);
U_CAPI char U_EXPORT2
uprv_toupper(char c);
U_CAPI char U_EXPORT2
uprv_asciitolower(char c);
U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_tolower uprv_asciitolower
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_tolower uprv_ebcdictolower
#else
# error U_CHARSET_FAMILY is not valid
#endif
#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
/* Conversion from a digit to the character with radix base from 2-19 */
/* May need to use U_UPPER_ORDINAL*/
#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
U_CAPI char* U_EXPORT2
uprv_strdup(const char *src);
/**
* uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
* Terminate with a null at offset n. If n is -1, works like uprv_strdup
* @param src
* @param n length of the input string, not including null.
* @return new string (owned by caller, use uprv_free to free).
* @internal
*/
U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n);
U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str);
U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str);
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix);
/**
* Case-insensitive, language-independent string comparison
* limited to the ASCII character repertoire.
*/
U_CAPI int U_EXPORT2
uprv_stricmp(const char *str1, const char *str2);
/**
* Case-insensitive, language-independent string comparison
* limited to the ASCII character repertoire.
*/
U_CAPI int U_EXPORT2
uprv_strnicmp(const char *str1, const char *str2, uint32_t n);
#endif /* ! CSTRING_H */

View File

@ -0,0 +1,53 @@
/*
******************************************************************************
*
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cwchar.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001may25
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#if !U_HAVE_WCSCPY
#include "cwchar.h"
U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
wchar_t *start=dst;
while(*dst!=0) {
++dst;
}
while((*dst=*src)!=0) {
++dst;
++src;
}
return start;
}
U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
wchar_t *start=dst;
while((*dst=*src)!=0) {
++dst;
++src;
}
return start;
}
U_CAPI size_t uprv_wcslen(const wchar_t *src) {
const wchar_t *start=src;
while(*src!=0) {
++src;
}
return src-start;
}
#endif

View File

@ -0,0 +1,56 @@
/*
******************************************************************************
*
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cwchar.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001may25
* created by: Markus W. Scherer
*
* This file contains ICU-internal definitions of wchar_t operations.
* These definitions were moved here from cstring.h so that fewer
* ICU implementation files include wchar.h.
*/
#ifndef __CWCHAR_H__
#define __CWCHAR_H__
#include <string.h>
#include <stdlib.h>
#include "unicode/utypes.h"
/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
#if U_HAVE_WCHAR_H
# include <wchar.h>
#endif
/*===========================================================================*/
/* Wide-character functions */
/*===========================================================================*/
/* The following are not available on all systems, defined in wchar.h or string.h. */
#if U_HAVE_WCSCPY
# define uprv_wcscpy wcscpy
# define uprv_wcscat wcscat
# define uprv_wcslen wcslen
#else
U_CAPI wchar_t* U_EXPORT2
uprv_wcscpy(wchar_t *dst, const wchar_t *src);
U_CAPI wchar_t* U_EXPORT2
uprv_wcscat(wchar_t *dst, const wchar_t *src);
U_CAPI size_t U_EXPORT2
uprv_wcslen(const wchar_t *src);
#endif
/* The following are part of the ANSI C standard, defined in stdlib.h . */
#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,427 @@
/**
*******************************************************************************
* Copyright (C) 2006-2014, International Business Machines Corporation *
* and others. All Rights Reserved. *
*******************************************************************************
*/
#ifndef DICTBE_H
#define DICTBE_H
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/utext.h"
#include "brkeng.h"
U_NAMESPACE_BEGIN
class DictionaryMatcher;
class Normalizer2;
/*******************************************************************
* DictionaryBreakEngine
*/
/**
* <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
* dictionary to determine language-specific breaks.</p>
*
* <p>After it is constructed a DictionaryBreakEngine may be shared between
* threads without synchronization.</p>
*/
class DictionaryBreakEngine : public LanguageBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fSet;
/**
* The set of break types handled by this engine
* @internal
*/
uint32_t fTypes;
/**
* <p>Default constructor.</p>
*
*/
DictionaryBreakEngine();
public:
/**
* <p>Constructor setting the break types handled.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
*/
DictionaryBreakEngine( uint32_t breakTypes );
/**
* <p>Virtual destructor.</p>
*/
virtual ~DictionaryBreakEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles( UChar32 c, int32_t breakType ) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text. The iterator is left at
* the end of the run of characters which the engine is capable of handling
* that starts from the first (or last) character in the range.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const;
protected:
/**
* <p>Set the character set handled by this engine.</p>
*
* @param set A UnicodeSet of the set of characters handled by the engine
*/
virtual void setCharacters( const UnicodeSet &set );
/**
* <p>Set the break types handled by this engine.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
*/
// virtual void setBreakTypes( uint32_t breakTypes );
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const = 0;
};
/*******************************************************************
* ThaiBreakEngine
*/
/**
* <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
* dictionary and heuristics to determine Thai-specific breaks.</p>
*
* <p>After it is constructed a ThaiBreakEngine may be shared between
* threads without synchronization.</p>
*/
class ThaiBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fThaiWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fSuffixSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~ThaiBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
/*******************************************************************
* LaoBreakEngine
*/
/**
* <p>LaoBreakEngine is a kind of DictionaryBreakEngine that uses a
* dictionary and heuristics to determine Lao-specific breaks.</p>
*
* <p>After it is constructed a LaoBreakEngine may be shared between
* threads without synchronization.</p>
*/
class LaoBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fLaoWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~LaoBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
/*******************************************************************
* BurmeseBreakEngine
*/
/**
* <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
* DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
*
* <p>After it is constructed a BurmeseBreakEngine may be shared between
* threads without synchronization.</p>
*/
class BurmeseBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fBurmeseWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~BurmeseBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
/*******************************************************************
* KhmerBreakEngine
*/
/**
* <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
* DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
*
* <p>After it is constructed a KhmerBreakEngine may be shared between
* threads without synchronization.</p>
*/
class KhmerBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fKhmerWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~KhmerBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
#if !UCONFIG_NO_NORMALIZATION
/*******************************************************************
* CjkBreakEngine
*/
//indicates language/script that the CjkBreakEngine will handle
enum LanguageType {
kKorean,
kChineseJapanese
};
/**
* <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
* dictionary with costs associated with each word and
* Viterbi decoding to determine CJK-specific breaks.</p>
*/
class CjkBreakEngine : public DictionaryBreakEngine {
protected:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fHangulWordSet;
UnicodeSet fHanWordSet;
UnicodeSet fKatakanaWordSet;
UnicodeSet fHiraganaWordSet;
DictionaryMatcher *fDictionary;
const Normalizer2 *nfkcNorm2;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted. The DictionaryMatcher must contain costs for each word
* in order for the dictionary to work properly.
*/
CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~CjkBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
#endif
U_NAMESPACE_END
/* DICTBE_H */
#endif

View File

@ -0,0 +1,240 @@
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* dictionarydata.h
*
* created on: 2012may31
* created by: Markus W. Scherer & Maxime Serrano
*/
#include "dictionarydata.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
#include "unicode/udata.h"
#include "cmemory.h"
#if !UCONFIG_NO_BREAK_ITERATION
U_NAMESPACE_BEGIN
const int32_t DictionaryData::TRIE_TYPE_BYTES = 0;
const int32_t DictionaryData::TRIE_TYPE_UCHARS = 1;
const int32_t DictionaryData::TRIE_TYPE_MASK = 7;
const int32_t DictionaryData::TRIE_HAS_VALUES = 8;
const int32_t DictionaryData::TRANSFORM_NONE = 0;
const int32_t DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;
const int32_t DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;
const int32_t DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;
DictionaryMatcher::~DictionaryMatcher() {
}
UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
udata_close(file);
}
int32_t UCharsDictionaryMatcher::getType() const {
return DictionaryData::TRIE_TYPE_UCHARS;
}
int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
int32_t *lengths, int32_t *cpLengths, int32_t *values,
int32_t *prefix) const {
UCharsTrie uct(characters);
int32_t startingTextIndex = utext_getNativeIndex(text);
int32_t wordCount = 0;
int32_t codePointsMatched = 0;
for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
codePointsMatched += 1;
if (USTRINGTRIE_HAS_VALUE(result)) {
if (wordCount < limit) {
if (values != NULL) {
values[wordCount] = uct.getValue();
}
if (lengths != NULL) {
lengths[wordCount] = lengthMatched;
}
if (cpLengths != NULL) {
cpLengths[wordCount] = codePointsMatched;
}
++wordCount;
}
if (result == USTRINGTRIE_FINAL_VALUE) {
break;
}
}
else if (result == USTRINGTRIE_NO_MATCH) {
break;
}
if (lengthMatched >= maxLength) {
break;
}
}
if (prefix != NULL) {
*prefix = codePointsMatched;
}
return wordCount;
}
BytesDictionaryMatcher::~BytesDictionaryMatcher() {
udata_close(file);
}
UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
if (c == 0x200D) {
return 0xFF;
} else if (c == 0x200C) {
return 0xFE;
}
int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
if (delta < 0 || 0xFD < delta) {
return U_SENTINEL;
}
return (UChar32)delta;
}
return c;
}
int32_t BytesDictionaryMatcher::getType() const {
return DictionaryData::TRIE_TYPE_BYTES;
}
int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
int32_t *lengths, int32_t *cpLengths, int32_t *values,
int32_t *prefix) const {
BytesTrie bt(characters);
int32_t startingTextIndex = utext_getNativeIndex(text);
int32_t wordCount = 0;
int32_t codePointsMatched = 0;
for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
codePointsMatched += 1;
if (USTRINGTRIE_HAS_VALUE(result)) {
if (wordCount < limit) {
if (values != NULL) {
values[wordCount] = bt.getValue();
}
if (lengths != NULL) {
lengths[wordCount] = lengthMatched;
}
if (cpLengths != NULL) {
cpLengths[wordCount] = codePointsMatched;
}
++wordCount;
}
if (result == USTRINGTRIE_FINAL_VALUE) {
break;
}
}
else if (result == USTRINGTRIE_NO_MATCH) {
break;
}
if (lengthMatched >= maxLength) {
break;
}
}
if (prefix != NULL) {
*prefix = codePointsMatched;
}
return wordCount;
}
U_NAMESPACE_END
U_NAMESPACE_USE
U_CAPI int32_t U_EXPORT2
udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
void *outData, UErrorCode *pErrorCode) {
const UDataInfo *pInfo;
int32_t headerSize;
const uint8_t *inBytes;
uint8_t *outBytes;
const int32_t *inIndexes;
int32_t indexes[DictionaryData::IX_COUNT];
int32_t i, offset, size;
headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
pInfo = (const UDataInfo *)((const char *)inData + 4);
if (!(pInfo->dataFormat[0] == 0x44 &&
pInfo->dataFormat[1] == 0x69 &&
pInfo->dataFormat[2] == 0x63 &&
pInfo->dataFormat[3] == 0x74 &&
pInfo->formatVersion[0] == 1)) {
udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
*pErrorCode = U_UNSUPPORTED_ERROR;
return 0;
}
inBytes = (const uint8_t *)inData + headerSize;
outBytes = (uint8_t *)outData + headerSize;
inIndexes = (const int32_t *)inBytes;
if (length >= 0) {
length -= headerSize;
if (length < (int32_t)(sizeof(indexes))) {
udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
for (i = 0; i < DictionaryData::IX_COUNT; i++) {
indexes[i] = udata_readInt32(ds, inIndexes[i]);
}
size = indexes[DictionaryData::IX_TOTAL_SIZE];
if (length >= 0) {
if (length < size) {
udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if (inBytes != outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
offset = 0;
ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
offset = (int32_t)sizeof(indexes);
int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
} else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
// nothing to do
} else {
udata_printError(ds, "udict_swap(): unknown trie type!\n");
*pErrorCode = U_UNSUPPORTED_ERROR;
return 0;
}
// these next two sections are empty in the current format,
// but may be used later.
offset = nextOffset;
nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
offset = nextOffset;
nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
offset = nextOffset;
}
return headerSize + size;
}
#endif

View File

@ -0,0 +1,189 @@
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* dictionarydata.h
*
* created on: 2012may31
* created by: Markus W. Scherer & Maxime Serrano
*/
#ifndef __DICTIONARYDATA_H__
#define __DICTIONARYDATA_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/utext.h"
#include "unicode/udata.h"
#include "udataswp.h"
#include "unicode/uobject.h"
#include "unicode/ustringtrie.h"
U_NAMESPACE_BEGIN
class UCharsTrie;
class BytesTrie;
class U_COMMON_API DictionaryData : public UMemory {
public:
static const int32_t TRIE_TYPE_BYTES; // = 0;
static const int32_t TRIE_TYPE_UCHARS; // = 1;
static const int32_t TRIE_TYPE_MASK; // = 7;
static const int32_t TRIE_HAS_VALUES; // = 8;
static const int32_t TRANSFORM_NONE; // = 0;
static const int32_t TRANSFORM_TYPE_OFFSET; // = 0x1000000;
static const int32_t TRANSFORM_TYPE_MASK; // = 0x7f000000;
static const int32_t TRANSFORM_OFFSET_MASK; // = 0x1fffff;
enum {
// Byte offsets from the start of the data, after the generic header.
IX_STRING_TRIE_OFFSET,
IX_RESERVED1_OFFSET,
IX_RESERVED2_OFFSET,
IX_TOTAL_SIZE,
// Trie type: TRIE_HAS_VALUES | TRIE_TYPE_BYTES etc.
IX_TRIE_TYPE,
// Transform specification: TRANSFORM_TYPE_OFFSET | 0xe00 etc.
IX_TRANSFORM,
IX_RESERVED6,
IX_RESERVED7,
IX_COUNT
};
};
/**
* Wrapper class around generic dictionaries, implementing matches().
* getType() should return a TRIE_TYPE_??? constant from DictionaryData.
*
* All implementations of this interface must be thread-safe if they are to be used inside of the
* dictionary-based break iteration code.
*/
class U_COMMON_API DictionaryMatcher : public UMemory {
public:
DictionaryMatcher() {};
virtual ~DictionaryMatcher();
// this should emulate CompactTrieDictionary::matches()
/* @param text The text in which to look for matching words. Matching begins
* at the current position of the UText.
* @param maxLength The max length of match to consider. Units are the native indexing
* units of the UText.
* @param limit Capacity of output arrays, which is also the maximum number of
* matching words to be found.
* @param lengths output array, filled with the lengths of the matches, in order,
* from shortest to longest. Lengths are in native indexing units
* of the UText. May be NULL.
* @param cpLengths output array, filled with the lengths of the matches, in order,
* from shortest to longest. Lengths are the number of Unicode code points.
* May be NULL.
* @param values Output array, filled with the values associated with the words found.
* May be NULL.
* @param prefix Output parameter, the code point length of the prefix match, even if that
* prefix didn't lead to a complete word. Will always be >= the cpLength
* of the longest complete word matched. May be NULL.
* @return Number of matching words found.
*/
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
int32_t *lengths, int32_t *cpLengths, int32_t *values,
int32_t *prefix) const = 0;
/** @return DictionaryData::TRIE_TYPE_XYZ */
virtual int32_t getType() const = 0;
};
// Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary
class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher {
public:
// constructs a new UCharsDictionaryMatcher.
// The UDataMemory * will be closed on this object's destruction.
UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { }
virtual ~UCharsDictionaryMatcher();
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
int32_t *lengths, int32_t *cpLengths, int32_t *values,
int32_t *prefix) const;
virtual int32_t getType() const;
private:
const UChar *characters;
UDataMemory *file;
};
// Implementation of the DictionaryMatcher interface for a BytesTrie dictionary
class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher {
public:
// constructs a new BytesTrieDictionaryMatcher
// the transform constant should be the constant read from the file, not a masked version!
// the UDataMemory * fed in here will be closed on this object's destruction
BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f)
: characters(c), transformConstant(t), file(f) { }
virtual ~BytesDictionaryMatcher();
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
int32_t *lengths, int32_t *cpLengths, int32_t *values,
int32_t *prefix) const;
virtual int32_t getType() const;
private:
UChar32 transform(UChar32 c) const;
const char *characters;
int32_t transformConstant;
UDataMemory *file;
};
U_NAMESPACE_END
U_CAPI int32_t U_EXPORT2
udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode);
/**
* Format of dictionary .dict data files.
* Format version 1.0.
*
* A dictionary .dict data file contains a byte-serialized BytesTrie or
* a UChars-serialized UCharsTrie.
* Such files are used in dictionary-based break iteration (DBBI).
*
* For a BytesTrie, a transformation type is specified for
* transforming Unicode strings into byte sequences.
*
* A .dict file begins with a standard ICU data file header
* (DataHeader, see ucmndata.h and unicode/udata.h).
* The UDataInfo.dataVersion field is currently unused (set to 0.0.0.0).
*
* After the header, the file contains the following parts.
* Constants are defined in the DictionaryData class.
*
* For the data structure of BytesTrie & UCharsTrie see
* http://site.icu-project.org/design/struct/tries
* and the bytestrie.h and ucharstrie.h header files.
*
* int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;
*
* The first four indexes are byte offsets in ascending order.
* Each byte offset marks the start of the next part in the data file,
* and the end of the previous one.
* When two consecutive byte offsets are the same, then the corresponding part is empty.
* Byte offsets are offsets from after the header,
* that is, from the beginning of the indexes[].
* Each part starts at an offset with proper alignment for its data.
* If necessary, the previous part may include padding bytes to achieve this alignment.
*
* trieType=indexes[IX_TRIE_TYPE] defines the trie type.
* transform=indexes[IX_TRANSFORM] defines the Unicode-to-bytes transformation.
* If the transformation type is TRANSFORM_TYPE_OFFSET,
* then the lower 21 bits contain the offset code point.
* Each code point c is mapped to byte b = (c - offset).
* Code points outside the range offset..(offset+0xff) cannot be mapped
* and do not occur in the dictionary.
*
* stringTrie; -- a serialized BytesTrie or UCharsTrie
*
* The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set in trieType),
* or it maps all strings to 0 (TRIE_HAS_VALUES bit not set).
*/
#endif /* !UCONFIG_NO_BREAK_ITERATION */
#endif /* __DICTIONARYDATA_H__ */

View File

@ -0,0 +1,61 @@
/*******************************************************************************
* Copyright (C) 2008, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File DTINTRV.CPP
*
*******************************************************************************
*/
#include "unicode/dtintrv.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
//DateInterval::DateInterval(){}
DateInterval::DateInterval(UDate from, UDate to)
: fromDate(from),
toDate(to)
{}
DateInterval::~DateInterval(){}
DateInterval::DateInterval(const DateInterval& other)
: UObject(other) {
*this = other;
}
DateInterval&
DateInterval::operator=(const DateInterval& other) {
if ( this != &other ) {
fromDate = other.fromDate;
toDate = other.toDate;
}
return *this;
}
DateInterval*
DateInterval::clone() const {
return new DateInterval(*this);
}
UBool
DateInterval::operator==(const DateInterval& other) const {
return ( fromDate == other.fromDate && toDate == other.toDate );
}
U_NAMESPACE_END

View File

@ -0,0 +1,40 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: errorcode.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009mar10
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/errorcode.h"
U_NAMESPACE_BEGIN
ErrorCode::~ErrorCode() {}
UErrorCode ErrorCode::reset() {
UErrorCode code = errorCode;
errorCode = U_ZERO_ERROR;
return code;
}
void ErrorCode::assertSuccess() const {
if(isFailure()) {
handleFailure();
}
}
const char* ErrorCode::errorName() const {
return u_errorName(errorCode);
}
U_NAMESPACE_END

View File

@ -0,0 +1,671 @@
/*
*******************************************************************************
* Copyright (C) 2014-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
#include "cmemory.h"
#include "unicode/filteredbrk.h"
#include "unicode/ucharstriebuilder.h"
#include "unicode/ures.h"
#include "uresimp.h" // ures_getByKeyWithFallback
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
#include "uvector.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
#ifndef FB_DEBUG
#define FB_DEBUG 0
#endif
#if FB_DEBUG
#include <stdio.h>
static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
char buf[2048];
if(s) {
s->extract(0,s->length(),buf,2048);
} else {
strcpy(buf,"NULL");
}
fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
}
#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
#else
#define FB_TRACE(m,s,b,d)
#endif
/**
* Used with sortedInsert()
*/
static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
const UnicodeString &a = *(const UnicodeString*)t1.pointer;
const UnicodeString &b = *(const UnicodeString*)t2.pointer;
return a.compare(b);
}
/**
* A UVector which implements a set of strings.
*/
class U_COMMON_API UStringSet : public UVector {
public:
UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
uhash_compareUnicodeString,
1,
status) {}
virtual ~UStringSet();
/**
* Is this UnicodeSet contained?
*/
inline UBool contains(const UnicodeString& s) {
return contains((void*) &s);
}
using UVector::contains;
/**
* Return the ith UnicodeString alias
*/
inline const UnicodeString* getStringAt(int32_t i) const {
return (const UnicodeString*)elementAt(i);
}
/**
* Adopt the UnicodeString if not already contained.
* Caller no longer owns the pointer in any case.
* @return true if adopted successfully, false otherwise (error, or else duplicate)
*/
inline UBool adopt(UnicodeString *str, UErrorCode &status) {
if(U_FAILURE(status) || contains(*str)) {
delete str;
return false;
} else {
sortedInsert(str, compareUnicodeString, status);
if(U_FAILURE(status)) {
delete str;
return false;
}
return true;
}
}
/**
* Add by value.
* @return true if successfully adopted.
*/
inline UBool add(const UnicodeString& str, UErrorCode &status) {
if(U_FAILURE(status)) return false;
UnicodeString *t = new UnicodeString(str);
if(t==NULL) {
status = U_MEMORY_ALLOCATION_ERROR; return false;
}
return adopt(t, status);
}
/**
* Remove this string.
* @return true if successfully removed, false otherwise (error, or else it wasn't there)
*/
inline UBool remove(const UnicodeString &s, UErrorCode &status) {
if(U_FAILURE(status)) return false;
return removeElement((void*) &s);
}
};
/**
* Virtual, won't be inlined
*/
UStringSet::~UStringSet() {}
/* ----------------------------------------------------------- */
/* Filtered Break constants */
static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
static const int32_t kMATCH = (1<<1); //< exact match - skip this one.
static const int32_t kSuppressInReverse = (1<<0);
static const int32_t kAddToForward = (1<<1);
static const UChar kFULLSTOP = 0x002E; // '.'
/**
* Shared data for SimpleFilteredSentenceBreakIterator
*/
class SimpleFilteredSentenceBreakData : public UMemory {
public:
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
virtual ~SimpleFilteredSentenceBreakData();
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
int32_t refcount;
};
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
/**
* Concrete implementation
*/
class SimpleFilteredSentenceBreakIterator : public BreakIterator {
public:
SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
virtual ~SimpleFilteredSentenceBreakIterator();
private:
SimpleFilteredSentenceBreakData *fData;
LocalPointer<BreakIterator> fDelegate;
LocalUTextPointer fText;
/* -- subclass interface -- */
public:
/* -- cloning and other subclass stuff -- */
virtual BreakIterator * createBufferClone(void * /*stackBuffer*/,
int32_t &/*BufferSize*/,
UErrorCode &status) {
// for now - always deep clone
status = U_SAFECLONE_ALLOCATED_WARNING;
return clone();
}
virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); }
virtual UClassID getDynamicClassID(void) const { return NULL; }
virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
/* -- text modifying -- */
virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
/* -- other functions that are just delegated -- */
virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
/* -- ITERATION -- */
virtual int32_t first(void);
virtual int32_t preceding(int32_t offset);
virtual int32_t previous(void);
virtual UBool isBoundary(int32_t offset);
virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
virtual int32_t next(void);
virtual int32_t next(int32_t n);
virtual int32_t following(int32_t offset);
virtual int32_t last(void);
private:
/**
* Given that the fDelegate has already given its "initial" answer,
* find the NEXT actual (non-excepted) break.
* @param n initial position from delegate
* @return new break position or UBRK_DONE
*/
int32_t internalNext(int32_t n);
/**
* Given that the fDelegate has already given its "initial" answer,
* find the PREV actual (non-excepted) break.
* @param n initial position from delegate
* @return new break position or UBRK_DONE
*/
int32_t internalPrev(int32_t n);
/**
* set up the UText with the value of the fDelegate.
* Call this before calling breakExceptionAt.
* May be able to avoid excess calls
*/
void resetState(UErrorCode &status);
/**
* Is there a match (exception) at this spot?
*/
enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
/**
* Determine if there is an exception at this spot
* @param n spot to check
* @return kNoExceptionHere or kExceptionHere
**/
enum EFBMatchResult breakExceptionAt(int32_t n);
};
SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
: BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
{
}
SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
fDelegate(adopt)
{
// all set..
}
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
fData = fData->decr();
}
void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
}
SimpleFilteredSentenceBreakIterator::EFBMatchResult
SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int64_t bestPosn = -1;
int32_t bestValue = -1;
// loops while 'n' points to an exception.
utext_setNativeIndex(fText.getAlias(), n); // from n..
fData->fBackwardsTrie->reset();
UChar32 uch;
//if(debug2) u_printf(" n@ %d\n", n);
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
// TODO only do this the 1st time?
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
} else {
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
uch = utext_next32(fText.getAlias());
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
}
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
bestPosn = utext_getNativeIndex(fText.getAlias());
bestValue = fData->fBackwardsTrie->getValue();
}
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
}
if(USTRINGTRIE_MATCHES(r)) { // exact match?
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
bestValue = fData->fBackwardsTrie->getValue();
bestPosn = utext_getNativeIndex(fText.getAlias());
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
}
if(bestPosn>=0) {
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
//int32_t bestValue = fBackwardsTrie->getValue();
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
if(bestValue == kMATCH) { // exact match!
//if(debug2) u_printf(" exact backward match\n");
return kExceptionHere; // See if the next is another exception.
} else if(bestValue == kPARTIAL
&& fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
//if(debug2) u_printf(" partial backward match\n");
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
// to see if it matches something going forward.
fData->fForwardsPartialTrie->reset();
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
}
if(USTRINGTRIE_MATCHES(rfwd)) {
//if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
// only full matches here, nothing to check
// skip the next:
return kExceptionHere;
} else {
//if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
// no match (no exception) -return the 'underlying' break
return kNoExceptionHere;
}
} else {
return kNoExceptionHere; // internal error and/or no forwards trie
}
} else {
//if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match
return kNoExceptionHere; // No match - so exit. Not an exception.
}
}
// the workhorse single next.
int32_t
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
if(n == UBRK_DONE || // at end or
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
UErrorCode status = U_ZERO_ERROR;
// refresh text
resetState(status);
if(U_FAILURE(status)) return UBRK_DONE; // bail out
int64_t utextLen = utext_nativeLength(fText.getAlias());
//if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
switch(m) {
case kExceptionHere:
n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
continue;
default:
case kNoExceptionHere:
return n;
}
}
return n;
}
int32_t
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
if(n == 0 || n == UBRK_DONE || // at end or
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
UErrorCode status = U_ZERO_ERROR;
// refresh text
resetState(status);
if(U_FAILURE(status)) return UBRK_DONE; // bail out
//if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
switch(m) {
case kExceptionHere:
n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
continue;
default:
case kNoExceptionHere:
return n;
}
}
return n;
}
int32_t
SimpleFilteredSentenceBreakIterator::next() {
return internalNext(fDelegate->next());
}
int32_t
SimpleFilteredSentenceBreakIterator::first(void) {
return internalNext(fDelegate->first());
}
int32_t
SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
return internalPrev(fDelegate->preceding(offset));
}
int32_t
SimpleFilteredSentenceBreakIterator::previous(void) {
return internalPrev(fDelegate->previous());
}
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
if(!fDelegate->isBoundary(offset)) return false; // no break to suppress
UErrorCode status = U_ZERO_ERROR;
resetState(status);
SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);
switch(m) {
case kExceptionHere:
return false;
default:
case kNoExceptionHere:
return true;
}
}
int32_t
SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
return internalNext(fDelegate->next(offset));
}
int32_t
SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
return internalNext(fDelegate->following(offset));
}
int32_t
SimpleFilteredSentenceBreakIterator::last(void) {
// Don't suppress a break opportunity at the end of text.
return fDelegate->last();
}
/**
* Concrete implementation of builder class.
*/
class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
public:
virtual ~SimpleFilteredBreakIteratorBuilder();
SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
private:
UStringSet fSet;
};
SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
{
}
SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
: fSet(status)
{
}
SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
: fSet(status)
{
if(U_SUCCESS(status)) {
LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &status));
LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &status));
LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &status));
if(U_FAILURE(status)) return; // leaves the builder empty, if you try to use it.
LocalUResourceBundlePointer strs;
UErrorCode subStatus = status;
do {
strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
if(strs.isValid() && U_SUCCESS(subStatus)) {
UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
suppressBreakAfter(str, status); // load the string
}
} while (strs.isValid() && U_SUCCESS(subStatus));
if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
status = subStatus;
}
}
}
UBool
SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
{
UBool r = fSet.add(exception, status);
FB_TRACE("suppressBreakAfter",&exception,r,0);
return r;
}
UBool
SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
{
UBool r = fSet.remove(exception, status);
FB_TRACE("unsuppressBreakAfter",&exception,r,0);
return r;
}
/**
* Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
* Work around this.
*
* Note: "new UnicodeString[subCount]" ends up calling global operator new
* on MSVC2012 for some reason.
*/
static inline UnicodeString* newUnicodeStringArray(size_t count) {
return new UnicodeString[count ? count : 1];
}
BreakIterator *
SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
LocalPointer<BreakIterator> adopt(adoptBreakIterator);
LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
if(U_FAILURE(status)) {
return NULL;
}
int32_t revCount = 0;
int32_t fwdCount = 0;
int32_t subCount = fSet.size();
UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
LocalArray<UnicodeString> ustrs(ustrs_ptr);
LocalMemory<int> partials;
partials.allocateInsteadAndReset(subCount);
LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs.
LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M."
int n=0;
for ( int32_t i = 0;
i<fSet.size();
i++) {
const UnicodeString *abbr = fSet.getStringAt(i);
if(abbr) {
FB_TRACE("build",abbr,TRUE,i);
ustrs[n] = *abbr; // copy by value
FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
} else {
FB_TRACE("build",abbr,FALSE,i);
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
partials[n] = 0; // default: not partial
n++;
}
// first pass - find partials.
for(int i=0;i<subCount;i++) {
int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
if(nn>-1 && (nn+1)!=ustrs[i].length()) {
FB_TRACE("partial",&ustrs[i],FALSE,i);
// is partial.
// is it unique?
int sameAs = -1;
for(int j=0;j<subCount;j++) {
if(j==i) continue;
if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
//UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
if(partials[j]==0) { // hasn't been processed yet
partials[j] = kSuppressInReverse | kAddToForward;
FB_TRACE("suppressing",&ustrs[j],FALSE,j);
} else if(partials[j] & kSuppressInReverse) {
sameAs = j; // the other entry is already in the reverse table.
}
}
}
FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
UnicodeString prefix(ustrs[i], 0, nn+1);
if(sameAs == -1 && partials[i] == 0) {
// first one - add the prefix to the reverse table.
prefix.reverse();
builder->add(prefix, kPARTIAL, status);
revCount++;
FB_TRACE("Added partial",&prefix,FALSE, i);
FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
partials[i] = kSuppressInReverse | kAddToForward;
} else {
FB_TRACE("NOT adding partial",&prefix,FALSE, i);
FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
}
}
}
for(int i=0;i<subCount;i++) {
if(partials[i]==0) {
ustrs[i].reverse();
builder->add(ustrs[i], kMATCH, status);
revCount++;
FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
} else {
FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
// an optimization would be to only add the portion after the '.'
// for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
// instead of "Ph.D." since we already know the "Ph." part is a match.
// would need the trie to be able to hold 0-length strings, though.
builder2->add(ustrs[i], kMATCH, status); // forward
fwdCount++;
//ustrs[i].reverse();
////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
}
}
FB_TRACE("AbbrCount",NULL,FALSE, subCount);
if(revCount>0) {
backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
if(U_FAILURE(status)) {
FB_TRACE(u_errorName(status),NULL,FALSE, -1);
return NULL;
}
}
if(fwdCount>0) {
forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
if(U_FAILURE(status)) {
FB_TRACE(u_errorName(status),NULL,FALSE, -1);
return NULL;
}
}
return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
}
// ----------- Base class implementation
FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
}
FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
}
FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
if(U_FAILURE(status)) return NULL;
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
return (U_SUCCESS(status))? ret.orphan(): NULL;
}
FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) {
if(U_FAILURE(status)) return NULL;
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
return (U_SUCCESS(status))? ret.orphan(): NULL;
}
U_NAMESPACE_END
#endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION

View File

@ -0,0 +1,288 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: filterednormalizer2.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009dec10
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/normalizer2.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cpputils.h"
U_NAMESPACE_BEGIN
FilteredNormalizer2::~FilteredNormalizer2() {}
UnicodeString &
FilteredNormalizer2::normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(src, errorCode);
if(U_FAILURE(errorCode)) {
dest.setToBogus();
return dest;
}
if(&dest==&src) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return dest;
}
dest.remove();
return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
}
// Internal: No argument checking, and appends to dest.
// Pass as input spanCondition the one that is likely to yield a non-zero
// span length at the start of src.
// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
// USET_SPAN_SIMPLE should be passed in for the start of src
// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
// an in-filter prefix.
UnicodeString &
FilteredNormalizer2::normalize(const UnicodeString &src,
UnicodeString &dest,
USetSpanCondition spanCondition,
UErrorCode &errorCode) const {
UnicodeString tempDest; // Don't throw away destination buffer between iterations.
for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
int32_t spanLength=spanLimit-prevSpanLimit;
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
if(spanLength!=0) {
dest.append(src, prevSpanLimit, spanLength);
}
spanCondition=USET_SPAN_SIMPLE;
} else {
if(spanLength!=0) {
// Not norm2.normalizeSecondAndAppend() because we do not want
// to modify the non-filter part of dest.
dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
tempDest, errorCode));
if(U_FAILURE(errorCode)) {
break;
}
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return dest;
}
UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
}
UnicodeString &
FilteredNormalizer2::append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
}
UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UBool doNormalize,
UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(first, errorCode);
uprv_checkCanGetBuffer(second, errorCode);
if(U_FAILURE(errorCode)) {
return first;
}
if(&first==&second) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return first;
}
if(first.isEmpty()) {
if(doNormalize) {
return normalize(second, first, errorCode);
} else {
return first=second;
}
}
// merge the in-filter suffix of the first string with the in-filter prefix of the second
int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
if(prefixLimit!=0) {
UnicodeString prefix(second.tempSubString(0, prefixLimit));
int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
if(suffixStart==0) {
if(doNormalize) {
norm2.normalizeSecondAndAppend(first, prefix, errorCode);
} else {
norm2.append(first, prefix, errorCode);
}
} else {
UnicodeString middle(first, suffixStart, INT32_MAX);
if(doNormalize) {
norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
} else {
norm2.append(middle, prefix, errorCode);
}
first.replace(suffixStart, INT32_MAX, middle);
}
}
if(prefixLimit<second.length()) {
UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
if(doNormalize) {
normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
} else {
first.append(rest);
}
}
return first;
}
UBool
FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
return set.contains(c) && norm2.getDecomposition(c, decomposition);
}
UBool
FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
}
UChar32
FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
}
uint8_t
FilteredNormalizer2::getCombiningClass(UChar32 c) const {
return set.contains(c) ? norm2.getCombiningClass(c) : 0;
}
UBool
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);
if(U_FAILURE(errorCode)) {
return FALSE;
}
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_SIMPLE;
} else {
if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
U_FAILURE(errorCode)
) {
return FALSE;
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return TRUE;
}
UNormalizationCheckResult
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);
if(U_FAILURE(errorCode)) {
return UNORM_MAYBE;
}
UNormalizationCheckResult result=UNORM_YES;
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_SIMPLE;
} else {
UNormalizationCheckResult qcResult=
norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
return qcResult;
} else if(qcResult==UNORM_MAYBE) {
result=qcResult;
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return result;
}
int32_t
FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);
if(U_FAILURE(errorCode)) {
return 0;
}
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_SIMPLE;
} else {
int32_t yesLimit=
prevSpanLimit+
norm2.spanQuickCheckYes(
s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
return yesLimit;
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return s.length();
}
UBool
FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
return !set.contains(c) || norm2.hasBoundaryBefore(c);
}
UBool
FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
return !set.contains(c) || norm2.hasBoundaryAfter(c);
}
UBool
FilteredNormalizer2::isInert(UChar32 c) const {
return !set.contains(c) || norm2.isInert(c);
}
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
U_NAMESPACE_USE
U_CAPI UNormalizer2 * U_EXPORT2
unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(filterSet==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
*UnicodeSet::fromUSet(filterSet));
if(fn2==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
}
return (UNormalizer2 *)fn2;
}
#endif // !UCONFIG_NO_NORMALIZATION

View File

@ -0,0 +1,212 @@
/*
******************************************************************************
* Copyright (C) 1997-2014, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
* 03/28/00 aliu Creation.
******************************************************************************
*/
#ifndef HASH_H
#define HASH_H
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "cmemory.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
/**
* Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
* hashtable implemented in C. Hashtable is designed to be idiomatic and
* easy-to-use in C++.
*
* Hashtable is an INTERNAL CLASS.
*/
class U_COMMON_API Hashtable : public UMemory {
UHashtable* hash;
UHashtable hashObj;
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
public:
/**
* Construct a hashtable
* @param ignoreKeyCase If true, keys are case insensitive.
* @param status Error code
*/
Hashtable(UBool ignoreKeyCase, UErrorCode& status);
/**
* Construct a hashtable
* @param keyComp Comparator for comparing the keys
* @param valueComp Comparator for comparing the values
* @param status Error code
*/
Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
/**
* Construct a hashtable
* @param status Error code
*/
Hashtable(UErrorCode& status);
/**
* Construct a hashtable, _disregarding any error_. Use this constructor
* with caution.
*/
Hashtable();
/**
* Non-virtual destructor; make this virtual if Hashtable is subclassed
* in the future.
*/
~Hashtable();
UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
int32_t count() const;
void* put(const UnicodeString& key, void* value, UErrorCode& status);
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
void* get(const UnicodeString& key) const;
int32_t geti(const UnicodeString& key) const;
void* remove(const UnicodeString& key);
int32_t removei(const UnicodeString& key);
void removeAll(void);
const UHashElement* find(const UnicodeString& key) const;
/**
* @param pos - must be UHASH_FIRST on first call, and untouched afterwards.
* @see uhash_nextElement
*/
const UHashElement* nextElement(int32_t& pos) const;
UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
UValueComparator* setValueComparator(UValueComparator* valueComp);
UBool equals(const Hashtable& that) const;
private:
Hashtable(const Hashtable &other); // forbid copying of this class
Hashtable &operator=(const Hashtable &other); // forbid copying of this class
};
/*********************************************************************
* Implementation
********************************************************************/
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
uhash_setKeyDeleter(hash, uprv_deleteUObject);
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status);
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(0)
{
init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
ignoreKeyCase ? uhash_compareCaselessUnicodeString
: uhash_compareUnicodeString,
NULL,
status);
}
inline Hashtable::Hashtable(UErrorCode& status)
: hash(0)
{
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
}
inline Hashtable::Hashtable()
: hash(0)
{
UErrorCode status = U_ZERO_ERROR;
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
}
inline Hashtable::~Hashtable() {
if (hash != NULL) {
uhash_close(hash);
}
}
inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
return uhash_setValueDeleter(hash, fn);
}
inline int32_t Hashtable::count() const {
return uhash_count(hash);
}
inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
return uhash_put(hash, new UnicodeString(key), value, &status);
}
inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
return uhash_puti(hash, new UnicodeString(key), value, &status);
}
inline void* Hashtable::get(const UnicodeString& key) const {
return uhash_get(hash, &key);
}
inline int32_t Hashtable::geti(const UnicodeString& key) const {
return uhash_geti(hash, &key);
}
inline void* Hashtable::remove(const UnicodeString& key) {
return uhash_remove(hash, &key);
}
inline int32_t Hashtable::removei(const UnicodeString& key) {
return uhash_removei(hash, &key);
}
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
return uhash_find(hash, &key);
}
inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
return uhash_nextElement(hash, &pos);
}
inline void Hashtable::removeAll(void) {
uhash_removeAll(hash);
}
inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
return uhash_setKeyComparator(hash, keyComp);
}
inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
return uhash_setValueComparator(hash, valueComp);
}
inline UBool Hashtable::equals(const Hashtable& that)const{
return uhash_equals(hash, that.hash);
}
U_NAMESPACE_END
#endif

View File

@ -0,0 +1,29 @@
/*
******************************************************************************
*
* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/icudataver.h"
#include "unicode/ures.h"
#include "uresimp.h" /* for ures_getVersionByKey */
U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
UResourceBundle *icudatares = NULL;
if (U_FAILURE(*status)) {
return;
}
if (dataVersionFillin != NULL) {
icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
if (U_SUCCESS(*status)) {
ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
}
ures_close(icudatares);
}
}

View File

@ -0,0 +1,882 @@
/*
******************************************************************************
*
* Copyright (C) 2009-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : icuplug.c
*
* Date Name Description
* 10/29/2009 sl New.
******************************************************************************
*/
#include "unicode/icuplug.h"
#if UCONFIG_ENABLE_PLUGINS
#include "icuplugimp.h"
#include "cstring.h"
#include "cmemory.h"
#include "putilimp.h"
#include "ucln.h"
#include <stdio.h>
#ifdef __MVS__ /* defined by z/OS compiler */
#define _POSIX_SOURCE
#include <cics.h> /* 12 Nov 2011 JAM iscics() function */
#endif
#include "charstr.h"
using namespace icu;
#ifndef UPLUG_TRACE
#define UPLUG_TRACE 0
#endif
#if UPLUG_TRACE
#include <stdio.h>
#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x
#endif
/**
* Internal structure of an ICU plugin.
*/
struct UPlugData {
UPlugEntrypoint *entrypoint; /**< plugin entrypoint */
uint32_t structSize; /**< initialized to the size of this structure */
uint32_t token; /**< must be U_PLUG_TOKEN */
void *lib; /**< plugin library, or NULL */
char libName[UPLUG_NAME_MAX]; /**< library name */
char sym[UPLUG_NAME_MAX]; /**< plugin symbol, or NULL */
char config[UPLUG_NAME_MAX]; /**< configuration data */
void *context; /**< user context data */
char name[UPLUG_NAME_MAX]; /**< name of plugin */
UPlugLevel level; /**< level of plugin */
UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
UErrorCode pluginStatus; /**< status code of plugin */
};
#define UPLUG_LIBRARY_INITIAL_COUNT 8
#define UPLUG_PLUGIN_INITIAL_COUNT 12
/**
* Remove an item
* @param list the full list
* @param listSize the number of entries in the list
* @param memberSize the size of one member
* @param itemToRemove the item number of the member
* @return the new listsize
*/
static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) {
uint8_t *bytePtr = (uint8_t *)list;
/* get rid of some bad cases first */
if(listSize<1) {
return listSize;
}
/* is there anything to move? */
if(listSize > itemToRemove+1) {
memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize);
}
return listSize-1;
}
#if U_ENABLE_DYLOAD
/**
* Library management. Internal.
* @internal
*/
struct UPlugLibrary;
/**
* Library management. Internal.
* @internal
*/
typedef struct UPlugLibrary {
void *lib; /**< library ptr */
char name[UPLUG_NAME_MAX]; /**< library name */
uint32_t ref; /**< reference count */
} UPlugLibrary;
static UPlugLibrary staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT];
static UPlugLibrary * libraryList = staticLibraryList;
static int32_t libraryCount = 0;
static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT;
/**
* Search for a library. Doesn't lock
* @param libName libname to search for
* @return the library's struct
*/
static int32_t searchForLibraryName(const char *libName) {
int32_t i;
for(i=0;i<libraryCount;i++) {
if(!uprv_strcmp(libName, libraryList[i].name)) {
return i;
}
}
return -1;
}
static int32_t searchForLibrary(void *lib) {
int32_t i;
for(i=0;i<libraryCount;i++) {
if(lib==libraryList[i].lib) {
return i;
}
}
return -1;
}
U_INTERNAL char * U_EXPORT2
uplug_findLibrary(void *lib, UErrorCode *status) {
int32_t libEnt;
char *ret = NULL;
if(U_FAILURE(*status)) {
return NULL;
}
libEnt = searchForLibrary(lib);
if(libEnt!=-1) {
ret = libraryList[libEnt].name;
} else {
*status = U_MISSING_RESOURCE_ERROR;
}
return ret;
}
U_INTERNAL void * U_EXPORT2
uplug_openLibrary(const char *libName, UErrorCode *status) {
int32_t libEntry = -1;
void *lib = NULL;
if(U_FAILURE(*status)) return NULL;
libEntry = searchForLibraryName(libName);
if(libEntry == -1) {
libEntry = libraryCount++;
if(libraryCount >= libraryMax) {
/* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */
*status = U_MEMORY_ALLOCATION_ERROR;
#if UPLUG_TRACE
DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax));
#endif
return NULL;
}
/* Some operating systems don't want
DL operations from multiple threads. */
libraryList[libEntry].lib = uprv_dl_open(libName, status);
#if UPLUG_TRACE
DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
#endif
if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) {
/* cleanup. */
libraryList[libEntry].lib = NULL; /* failure with open */
libraryList[libEntry].name[0] = 0;
#if UPLUG_TRACE
DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
#endif
/* no need to free - just won't increase the count. */
libraryCount--;
} else { /* is it still there? */
/* link it in */
uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX);
libraryList[libEntry].ref=1;
lib = libraryList[libEntry].lib;
}
} else {
lib = libraryList[libEntry].lib;
libraryList[libEntry].ref++;
}
return lib;
}
U_INTERNAL void U_EXPORT2
uplug_closeLibrary(void *lib, UErrorCode *status) {
int32_t i;
#if UPLUG_TRACE
DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList));
#endif
if(U_FAILURE(*status)) return;
for(i=0;i<libraryCount;i++) {
if(lib==libraryList[i].lib) {
if(--(libraryList[i].ref) == 0) {
uprv_dl_close(libraryList[i].lib, status);
libraryCount = uplug_removeEntryAt(libraryList, libraryCount, sizeof(*libraryList), i);
}
return;
}
}
*status = U_INTERNAL_PROGRAM_ERROR; /* could not find the entry! */
}
#endif
static UPlugData pluginList[UPLUG_PLUGIN_INITIAL_COUNT];
static int32_t pluginCount = 0;
static int32_t uplug_pluginNumber(UPlugData* d) {
UPlugData *pastPlug = &pluginList[pluginCount];
if(d<=pluginList) {
return 0;
} else if(d>=pastPlug) {
return pluginCount;
} else {
return (d-pluginList)/sizeof(pluginList[0]);
}
}
U_CAPI UPlugData * U_EXPORT2
uplug_nextPlug(UPlugData *prior) {
if(prior==NULL) {
return pluginList;
} else {
UPlugData *nextPlug = &prior[1];
UPlugData *pastPlug = &pluginList[pluginCount];
if(nextPlug>=pastPlug) {
return NULL;
} else {
return nextPlug;
}
}
}
/**
* Call the plugin with some params
*/
static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) {
UPlugTokenReturn token;
if(plug==NULL||U_FAILURE(*status)) {
return;
}
token = (*(plug->entrypoint))(plug, reason, status);
if(token!=UPLUG_TOKEN) {
*status = U_INTERNAL_PROGRAM_ERROR;
}
}
static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
if(U_SUCCESS(plug->pluginStatus)) {
/* Don't unload a plug which has a failing load status - means it didn't actually load. */
uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status);
}
}
static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
plug->level = UPLUG_LEVEL_INVALID;
uplug_callPlug(plug, UPLUG_REASON_QUERY, status);
if(U_SUCCESS(*status)) {
if(plug->level == UPLUG_LEVEL_INVALID) {
plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
plug->awaitingLoad = FALSE;
}
} else {
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
plug->awaitingLoad = FALSE;
}
}
static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
if(U_FAILURE(*status)) {
return;
}
if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
plug->awaitingLoad = FALSE;
if(!U_SUCCESS(*status)) {
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
}
}
static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
{
UPlugData *plug = NULL;
if(U_FAILURE(*status)) {
return NULL;
}
if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
plug = &pluginList[pluginCount++];
plug->token = UPLUG_TOKEN;
plug->structSize = sizeof(UPlugData);
plug->name[0]=0;
plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
plug->awaitingLoad = TRUE;
plug->dontUnload = FALSE;
plug->pluginStatus = U_ZERO_ERROR;
plug->libName[0] = 0;
plug->config[0]=0;
plug->sym[0]=0;
plug->lib=NULL;
plug->entrypoint=NULL;
return plug;
}
static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName,
UErrorCode *status) {
UPlugData *plug = uplug_allocateEmptyPlug(status);
if(U_FAILURE(*status)) {
return NULL;
}
if(config!=NULL) {
uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
} else {
plug->config[0] = 0;
}
if(symName!=NULL) {
uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX);
} else {
plug->sym[0] = 0;
}
plug->entrypoint = entrypoint;
plug->lib = lib;
uplug_queryPlug(plug, status);
return plug;
}
static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
UErrorCode subStatus = U_ZERO_ERROR;
if(!plug->dontUnload) {
#if U_ENABLE_DYLOAD
uplug_closeLibrary(plug->lib, &subStatus);
#endif
}
plug->lib = NULL;
if(U_SUCCESS(*status) && U_FAILURE(subStatus)) {
*status = subStatus;
}
/* shift plugins up and decrement count. */
if(U_SUCCESS(*status)) {
/* all ok- remove. */
pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
} else {
/* not ok- leave as a message. */
plug->awaitingLoad=FALSE;
plug->entrypoint=0;
plug->dontUnload=TRUE;
}
}
static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {
if(plugToRemove != NULL) {
uplug_unloadPlug(plugToRemove, status);
uplug_deallocatePlug(plugToRemove, status);
}
}
U_CAPI void U_EXPORT2
uplug_removePlug(UPlugData *plug, UErrorCode *status) {
UPlugData *cursor = NULL;
UPlugData *plugToRemove = NULL;
if(U_FAILURE(*status)) return;
for(cursor=pluginList;cursor!=NULL;) {
if(cursor==plug) {
plugToRemove = plug;
cursor=NULL;
} else {
cursor = uplug_nextPlug(cursor);
}
}
uplug_doUnloadPlug(plugToRemove, status);
}
U_CAPI void U_EXPORT2
uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload)
{
data->dontUnload = dontUnload;
}
U_CAPI void U_EXPORT2
uplug_setPlugLevel(UPlugData *data, UPlugLevel level) {
data->level = level;
}
U_CAPI UPlugLevel U_EXPORT2
uplug_getPlugLevel(UPlugData *data) {
return data->level;
}
U_CAPI void U_EXPORT2
uplug_setPlugName(UPlugData *data, const char *name) {
uprv_strncpy(data->name, name, UPLUG_NAME_MAX);
}
U_CAPI const char * U_EXPORT2
uplug_getPlugName(UPlugData *data) {
return data->name;
}
U_CAPI const char * U_EXPORT2
uplug_getSymbolName(UPlugData *data) {
return data->sym;
}
U_CAPI const char * U_EXPORT2
uplug_getLibraryName(UPlugData *data, UErrorCode *status) {
if(data->libName[0]) {
return data->libName;
} else {
#if U_ENABLE_DYLOAD
return uplug_findLibrary(data->lib, status);
#else
return NULL;
#endif
}
}
U_CAPI void * U_EXPORT2
uplug_getLibrary(UPlugData *data) {
return data->lib;
}
U_CAPI void * U_EXPORT2
uplug_getContext(UPlugData *data) {
return data->context;
}
U_CAPI void U_EXPORT2
uplug_setContext(UPlugData *data, void *context) {
data->context = context;
}
U_CAPI const char* U_EXPORT2
uplug_getConfiguration(UPlugData *data) {
return data->config;
}
U_INTERNAL UPlugData* U_EXPORT2
uplug_getPlugInternal(int32_t n) {
if(n <0 || n >= pluginCount) {
return NULL;
} else {
return &(pluginList[n]);
}
}
U_CAPI UErrorCode U_EXPORT2
uplug_getPlugLoadStatus(UPlugData *plug) {
return plug->pluginStatus;
}
/**
* Initialize a plugin fron an entrypoint and library - but don't load it.
*/
static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
UErrorCode *status) {
UPlugData *plug = NULL;
plug = uplug_allocatePlug(entrypoint, config, lib, sym, status);
if(U_SUCCESS(*status)) {
return plug;
} else {
uplug_deallocatePlug(plug, status);
return NULL;
}
}
U_CAPI UPlugData* U_EXPORT2
uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) {
UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status);
uplug_loadPlug(plug, status);
return plug;
}
#if U_ENABLE_DYLOAD
static UPlugData*
uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status)
{
UPlugData *plug = uplug_allocateEmptyPlug(status);
if(U_FAILURE(*status)) return NULL;
plug->pluginStatus = loadStatus;
plug->awaitingLoad = FALSE; /* Won't load. */
plug->dontUnload = TRUE; /* cannot unload. */
if(sym!=NULL) {
uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
}
if(libName!=NULL) {
uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX);
}
if(nameOrError!=NULL) {
uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX);
}
if(config!=NULL) {
uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
}
return plug;
}
/**
* Fetch a plugin from DLL, and then initialize it from a library- but don't load it.
*/
static UPlugData*
uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
void *lib = NULL;
UPlugData *plug = NULL;
if(U_FAILURE(*status)) { return NULL; }
lib = uplug_openLibrary(libName, status);
if(lib!=NULL && U_SUCCESS(*status)) {
UPlugEntrypoint *entrypoint = NULL;
entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status);
if(entrypoint!=NULL&&U_SUCCESS(*status)) {
plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status);
if(plug!=NULL&&U_SUCCESS(*status)) {
plug->lib = lib; /* plug takes ownership of library */
lib = NULL; /* library is now owned by plugin. */
}
} else {
UErrorCode subStatus = U_ZERO_ERROR;
plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
}
if(lib!=NULL) { /* still need to close the lib */
UErrorCode subStatus = U_ZERO_ERROR;
uplug_closeLibrary(lib, &subStatus); /* don't care here */
}
} else {
UErrorCode subStatus = U_ZERO_ERROR;
plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
}
return plug;
}
U_CAPI UPlugData* U_EXPORT2
uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
UPlugData *plug = NULL;
if(U_FAILURE(*status)) { return NULL; }
plug = uplug_initPlugFromLibrary(libName, sym, config, status);
uplug_loadPlug(plug, status);
return plug;
}
#endif
static UPlugLevel gCurrentLevel = UPLUG_LEVEL_LOW;
U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() {
return gCurrentLevel;
}
static UBool U_CALLCONV uplug_cleanup(void)
{
int32_t i;
UPlugData *pluginToRemove;
/* cleanup plugs */
for(i=0;i<pluginCount;i++) {
UErrorCode subStatus = U_ZERO_ERROR;
pluginToRemove = &pluginList[i];
/* unload and deallocate */
uplug_doUnloadPlug(pluginToRemove, &subStatus);
}
/* close other held libs? */
gCurrentLevel = UPLUG_LEVEL_LOW;
return TRUE;
}
#if U_ENABLE_DYLOAD
static void uplug_loadWaitingPlugs(UErrorCode *status) {
int32_t i;
UPlugLevel currentLevel = uplug_getCurrentLevel();
if(U_FAILURE(*status)) {
return;
}
#if UPLUG_TRACE
DBG((stderr, "uplug_loadWaitingPlugs() Level: %d\n", currentLevel));
#endif
/* pass #1: low level plugs */
for(i=0;i<pluginCount;i++) {
UErrorCode subStatus = U_ZERO_ERROR;
UPlugData *pluginToLoad = &pluginList[i];
if(pluginToLoad->awaitingLoad) {
if(pluginToLoad->level == UPLUG_LEVEL_LOW) {
if(currentLevel > UPLUG_LEVEL_LOW) {
pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH;
} else {
UPlugLevel newLevel;
uplug_loadPlug(pluginToLoad, &subStatus);
newLevel = uplug_getCurrentLevel();
if(newLevel > currentLevel) {
pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING;
currentLevel = newLevel;
}
}
pluginToLoad->awaitingLoad = FALSE;
}
}
}
for(i=0;i<pluginCount;i++) {
UErrorCode subStatus = U_ZERO_ERROR;
UPlugData *pluginToLoad = &pluginList[i];
if(pluginToLoad->awaitingLoad) {
if(pluginToLoad->level == UPLUG_LEVEL_INVALID) {
pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
} else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) {
pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
} else {
uplug_loadPlug(pluginToLoad, &subStatus);
}
pluginToLoad->awaitingLoad = FALSE;
}
}
#if UPLUG_TRACE
DBG((stderr, " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel()));
#endif
}
/* Name of the plugin config file */
static char plugin_file[2048] = "";
#endif
U_INTERNAL const char* U_EXPORT2
uplug_getPluginFile() {
#if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO
return plugin_file;
#else
return NULL;
#endif
}
// uplug_init() is called first thing from u_init().
U_CAPI void U_EXPORT2
uplug_init(UErrorCode *status) {
#if !U_ENABLE_DYLOAD
(void)status; /* unused */
#elif !UCONFIG_NO_FILE_IO
CharString plugin_dir;
const char *env = getenv("ICU_PLUGINS");
if(U_FAILURE(*status)) return;
if(env != NULL) {
plugin_dir.append(env, -1, *status);
}
if(U_FAILURE(*status)) return;
#if defined(DEFAULT_ICU_PLUGINS)
if(plugin_dir.isEmpty()) {
plugin_dir.append(DEFAULT_ICU_PLUGINS, -1, *status);
}
#endif
#if UPLUG_TRACE
DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir.data()));
#endif
if(!plugin_dir.isEmpty()) {
FILE *f;
CharString pluginFile;
#ifdef OS390BATCH
/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS */
/* Keeping in mind that unauthorized file access is logged, monitored, and enforced */
/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX */
/* System Services. Alternative techniques might be allocating a member in */
/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?). The */
/* DDNAME can be connected to a file in the HFS if need be. */
pluginFile.append("//DD:ICUPLUG", -1, *status); /* JAM 20 Oct 2011 */
#else
pluginFile.append(plugin_dir, *status);
pluginFile.append(U_FILE_SEP_STRING, -1, *status);
pluginFile.append("icuplugins", -1, *status);
pluginFile.append(U_ICU_VERSION_SHORT, -1, *status);
pluginFile.append(".txt", -1, *status);
#endif
#if UPLUG_TRACE
DBG((stderr, "status=%s\n", u_errorName(*status)));
#endif
if(U_FAILURE(*status)) {
return;
}
if((size_t)pluginFile.length() > (sizeof(plugin_file)-1)) {
*status = U_BUFFER_OVERFLOW_ERROR;
#if UPLUG_TRACE
DBG((stderr, "status=%s\n", u_errorName(*status)));
#endif
return;
}
/* plugin_file is not used for processing - it is only used
so that uplug_getPluginFile() works (i.e. icuinfo)
*/
uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file));
#if UPLUG_TRACE
DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
#endif
#ifdef __MVS__
if (iscics()) /* 12 Nov 2011 JAM */
{
f = NULL;
}
else
#endif
{
f = fopen(pluginFile.data(), "r");
}
if(f != NULL) {
char linebuf[1024];
char *p, *libName=NULL, *symName=NULL, *config=NULL;
int32_t line = 0;
while(fgets(linebuf,1023,f)) {
line++;
if(!*linebuf || *linebuf=='#') {
continue;
} else {
p = linebuf;
while(*p&&isspace((int)*p))
p++;
if(!*p || *p=='#') continue;
libName = p;
while(*p&&!isspace((int)*p)) {
p++;
}
if(!*p || *p=='#') continue; /* no tab after libname */
*p=0; /* end of libname */
p++;
while(*p&&isspace((int)*p)) {
p++;
}
if(!*p||*p=='#') continue; /* no symname after libname +tab */
symName = p;
while(*p&&!isspace((int)*p)) {
p++;
}
if(*p) { /* has config */
*p=0;
++p;
while(*p&&isspace((int)*p)) {
p++;
}
if(*p) {
config = p;
}
}
/* chop whitespace at the end of the config */
if(config!=NULL&&*config!=0) {
p = config+strlen(config);
while(p>config&&isspace((int)*(--p))) {
*p=0;
}
}
/* OK, we're good. */
{
UErrorCode subStatus = U_ZERO_ERROR;
UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus);
if(U_FAILURE(subStatus) && U_SUCCESS(*status)) {
*status = subStatus;
}
#if UPLUG_TRACE
DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config));
DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus)));
#else
(void)plug; /* unused */
#endif
}
}
}
fclose(f);
} else {
#if UPLUG_TRACE
DBG((stderr, "Can't open plugin file %s\n", plugin_file));
#endif
}
}
uplug_loadWaitingPlugs(status);
#endif /* U_ENABLE_DYLOAD */
gCurrentLevel = UPLUG_LEVEL_HIGH;
ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup);
}
#endif

View File

@ -0,0 +1,91 @@
/*
******************************************************************************
*
* Copyright (C) 2009-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : icuplugimp.h
*
* Internal functions for the ICU plugin system
*
* Date Name Description
* 10/29/2009 sl New.
******************************************************************************
*/
#ifndef ICUPLUGIMP_H
#define ICUPLUGIMP_H
#include "unicode/icuplug.h"
#if UCONFIG_ENABLE_PLUGINS
/*========================*/
/** @{ Library Manipulation
*/
/**
* Open a library, adding a reference count if needed.
* @param libName library name to load
* @param status error code
* @return the library pointer, or NULL
* @internal internal use only
*/
U_INTERNAL void * U_EXPORT2
uplug_openLibrary(const char *libName, UErrorCode *status);
/**
* Close a library, if its reference count is 0
* @param lib the library to close
* @param status error code
* @internal internal use only
*/
U_INTERNAL void U_EXPORT2
uplug_closeLibrary(void *lib, UErrorCode *status);
/**
* Get a library's name, or NULL if not found.
* @param lib the library's name
* @param status error code
* @return the library name, or NULL if not found.
* @internal internal use only
*/
U_INTERNAL char * U_EXPORT2
uplug_findLibrary(void *lib, UErrorCode *status);
/** @} */
/*========================*/
/** {@ ICU Plugin internal interfaces
*/
/**
* Initialize the plugins
* @param status error result
* @internal - Internal use only.
*/
U_INTERNAL void U_EXPORT2
uplug_init(UErrorCode *status);
/**
* Get raw plug N
* @internal - Internal use only
*/
U_INTERNAL UPlugData* U_EXPORT2
uplug_getPlugInternal(int32_t n);
/**
* Get the name of the plugin file.
* @internal - Internal use only.
*/
U_INTERNAL const char* U_EXPORT2
uplug_getPluginFile(void);
/** @} */
#endif
#endif

View File

@ -0,0 +1,364 @@
/*
*******************************************************************************
*
* Copyright (C) 2013-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: listformatter.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2012aug27
* created by: Umesh P. Nair
*/
#include "unicode/listformatter.h"
#include "simplepatternformatter.h"
#include "mutex.h"
#include "hash.h"
#include "cstring.h"
#include "ulocimp.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uresimp.h"
U_NAMESPACE_BEGIN
struct ListFormatInternal : public UMemory {
SimplePatternFormatter twoPattern;
SimplePatternFormatter startPattern;
SimplePatternFormatter middlePattern;
SimplePatternFormatter endPattern;
ListFormatInternal(
const UnicodeString& two,
const UnicodeString& start,
const UnicodeString& middle,
const UnicodeString& end) :
twoPattern(two),
startPattern(start),
middlePattern(middle),
endPattern(end) {}
ListFormatInternal(const ListFormatData &data) :
twoPattern(data.twoPattern),
startPattern(data.startPattern),
middlePattern(data.middlePattern),
endPattern(data.endPattern) { }
ListFormatInternal(const ListFormatInternal &other) :
twoPattern(other.twoPattern),
startPattern(other.startPattern),
middlePattern(other.middlePattern),
endPattern(other.endPattern) { }
};
static Hashtable* listPatternHash = NULL;
static UMutex listFormatterMutex = U_MUTEX_INITIALIZER;
static const char *STANDARD_STYLE = "standard";
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_listformatter_cleanup() {
delete listPatternHash;
listPatternHash = NULL;
return TRUE;
}
static void U_CALLCONV
uprv_deleteListFormatInternal(void *obj) {
delete static_cast<ListFormatInternal *>(obj);
}
U_CDECL_END
static ListFormatInternal* loadListFormatInternal(
const Locale& locale,
const char* style,
UErrorCode& errorCode);
static void getStringByKey(
const UResourceBundle* rb,
const char* key,
UnicodeString& result,
UErrorCode& errorCode);
ListFormatter::ListFormatter(const ListFormatter& other) :
owned(other.owned), data(other.data) {
if (other.owned != NULL) {
owned = new ListFormatInternal(*other.owned);
data = owned;
}
}
ListFormatter& ListFormatter::operator=(const ListFormatter& other) {
if (this == &other) {
return *this;
}
delete owned;
if (other.owned) {
owned = new ListFormatInternal(*other.owned);
data = owned;
} else {
owned = NULL;
data = other.data;
}
return *this;
}
void ListFormatter::initializeHash(UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
listPatternHash = new Hashtable();
if (listPatternHash == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
listPatternHash->setValueDeleter(uprv_deleteListFormatInternal);
ucln_common_registerCleanup(UCLN_COMMON_LIST_FORMATTER, uprv_listformatter_cleanup);
}
const ListFormatInternal* ListFormatter::getListFormatInternal(
const Locale& locale, const char *style, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return NULL;
}
CharString keyBuffer(locale.getName(), errorCode);
keyBuffer.append(':', errorCode).append(style, errorCode);
UnicodeString key(keyBuffer.data(), -1, US_INV);
ListFormatInternal* result = NULL;
{
Mutex m(&listFormatterMutex);
if (listPatternHash == NULL) {
initializeHash(errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
}
result = static_cast<ListFormatInternal*>(listPatternHash->get(key));
}
if (result != NULL) {
return result;
}
result = loadListFormatInternal(locale, style, errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
{
Mutex m(&listFormatterMutex);
ListFormatInternal* temp = static_cast<ListFormatInternal*>(listPatternHash->get(key));
if (temp != NULL) {
delete result;
result = temp;
} else {
listPatternHash->put(key, result, errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
}
}
return result;
}
static ListFormatInternal* loadListFormatInternal(
const Locale& locale, const char * style, UErrorCode& errorCode) {
UResourceBundle* rb = ures_open(NULL, locale.getName(), &errorCode);
if (U_FAILURE(errorCode)) {
ures_close(rb);
return NULL;
}
rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode);
rb = ures_getByKeyWithFallback(rb, style, rb, &errorCode);
if (U_FAILURE(errorCode)) {
ures_close(rb);
return NULL;
}
UnicodeString two, start, middle, end;
getStringByKey(rb, "2", two, errorCode);
getStringByKey(rb, "start", start, errorCode);
getStringByKey(rb, "middle", middle, errorCode);
getStringByKey(rb, "end", end, errorCode);
ures_close(rb);
if (U_FAILURE(errorCode)) {
return NULL;
}
ListFormatInternal* result = new ListFormatInternal(two, start, middle, end);
if (result == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
return result;
}
static void getStringByKey(const UResourceBundle* rb, const char* key, UnicodeString& result, UErrorCode& errorCode) {
int32_t len;
const UChar* ustr = ures_getStringByKeyWithFallback(rb, key, &len, &errorCode);
if (U_FAILURE(errorCode)) {
return;
}
result.setTo(ustr, len);
}
ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
Locale locale; // The default locale.
return createInstance(locale, errorCode);
}
ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) {
return createInstance(locale, STANDARD_STYLE, errorCode);
}
ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) {
Locale tempLocale = locale;
const ListFormatInternal* listFormatInternal = getListFormatInternal(tempLocale, style, errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
ListFormatter* p = new ListFormatter(listFormatInternal);
if (p == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
return p;
}
ListFormatter::ListFormatter(const ListFormatData& listFormatData) {
owned = new ListFormatInternal(listFormatData);
data = owned;
}
ListFormatter::ListFormatter(const ListFormatInternal* listFormatterInternal) : owned(NULL), data(listFormatterInternal) {
}
ListFormatter::~ListFormatter() {
delete owned;
}
/**
* Joins first and second using the pattern pat.
* On entry offset is an offset into first or -1 if offset unspecified.
* On exit offset is offset of second in result if recordOffset was set
* Otherwise if it was >=0 it is set to point into result where it used
* to point into first. On exit, result is the join of first and second
* according to pat. Any previous value of result gets replaced.
*/
static void joinStringsAndReplace(
const SimplePatternFormatter& pat,
const UnicodeString& first,
const UnicodeString& second,
UnicodeString &result,
UBool recordOffset,
int32_t &offset,
UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
const UnicodeString *params[2] = {&first, &second};
int32_t offsets[2];
pat.formatAndReplace(
params,
UPRV_LENGTHOF(params),
result,
offsets,
UPRV_LENGTHOF(offsets),
errorCode);
if (U_FAILURE(errorCode)) {
return;
}
if (offsets[0] == -1 || offsets[1] == -1) {
errorCode = U_INVALID_FORMAT_ERROR;
return;
}
if (recordOffset) {
offset = offsets[1];
} else if (offset >= 0) {
offset += offsets[0];
}
}
UnicodeString& ListFormatter::format(
const UnicodeString items[],
int32_t nItems,
UnicodeString& appendTo,
UErrorCode& errorCode) const {
int32_t offset;
return format(items, nItems, appendTo, -1, offset, errorCode);
}
UnicodeString& ListFormatter::format(
const UnicodeString items[],
int32_t nItems,
UnicodeString& appendTo,
int32_t index,
int32_t &offset,
UErrorCode& errorCode) const {
offset = -1;
if (U_FAILURE(errorCode)) {
return appendTo;
}
if (data == NULL) {
errorCode = U_INVALID_STATE_ERROR;
return appendTo;
}
if (nItems <= 0) {
return appendTo;
}
if (nItems == 1) {
if (index == 0) {
offset = appendTo.length();
}
appendTo.append(items[0]);
return appendTo;
}
UnicodeString result(items[0]);
if (index == 0) {
offset = 0;
}
joinStringsAndReplace(
nItems == 2 ? data->twoPattern : data->startPattern,
result,
items[1],
result,
index == 1,
offset,
errorCode);
if (nItems > 2) {
for (int32_t i = 2; i < nItems - 1; ++i) {
joinStringsAndReplace(
data->middlePattern,
result,
items[i],
result,
index == i,
offset,
errorCode);
}
joinStringsAndReplace(
data->endPattern,
result,
items[nItems - 1],
result,
index == nItems - 1,
offset,
errorCode);
}
if (U_SUCCESS(errorCode)) {
if (offset >= 0) {
offset += appendTo.length();
}
appendTo += result;
}
return appendTo;
}
U_NAMESPACE_END

View File

@ -0,0 +1,356 @@
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* loadednormalizer2impl.cpp
*
* created on: 2014sep03
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/udata.h"
#include "unicode/localpointer.h"
#include "unicode/normalizer2.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cstring.h"
#include "mutex.h"
#include "norm2allmodes.h"
#include "normalizer2impl.h"
#include "uassert.h"
#include "ucln_cmn.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
class LoadedNormalizer2Impl : public Normalizer2Impl {
public:
LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
virtual ~LoadedNormalizer2Impl();
void load(const char *packageName, const char *name, UErrorCode &errorCode);
private:
static UBool U_CALLCONV
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
UDataMemory *memory;
UTrie2 *ownedTrie;
};
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
udata_close(memory);
utrie2_close(ownedTrie);
}
UBool U_CALLCONV
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
const char * /* type */, const char * /*name*/,
const UDataInfo *pInfo) {
if(
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==2
) {
// Normalizer2Impl *me=(Normalizer2Impl *)context;
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
return TRUE;
} else {
return FALSE;
}
}
void
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
if(U_FAILURE(errorCode)) {
return;
}
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_MAYBE_YES) {
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
return;
}
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
inBytes+offset, nextOffset-offset, NULL,
&errorCode);
if(U_FAILURE(errorCode)) {
return;
}
offset=nextOffset;
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
// smallFCD: new in formatVersion 2
offset=nextOffset;
const uint8_t *inSmallFCD=inBytes+offset;
init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
}
// instance cache ---------------------------------------------------------- ***
Norm2AllModes *
Norm2AllModes::createInstance(const char *packageName,
const char *name,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
if(impl==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
impl->load(packageName, name, errorCode);
return createInstance(impl, errorCode);
}
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
U_CDECL_END
static Norm2AllModes *nfkcSingleton;
static Norm2AllModes *nfkc_cfSingleton;
static UHashtable *cache=NULL;
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
// UInitOnce singleton initialization function
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
if (uprv_strcmp(what, "nfkc") == 0) {
nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
} else {
U_ASSERT(FALSE); // Unknown singleton
}
ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
}
U_CDECL_BEGIN
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
delete (Norm2AllModes *)allModes;
}
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
delete nfkcSingleton;
nfkcSingleton = NULL;
delete nfkc_cfSingleton;
nfkc_cfSingleton = NULL;
uhash_close(cache);
cache=NULL;
nfkcInitOnce.reset();
nfkc_cfInitOnce.reset();
return TRUE;
}
U_CDECL_END
const Norm2AllModes *
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
return nfkcSingleton;
}
const Norm2AllModes *
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
return nfkc_cfSingleton;
}
const Normalizer2 *
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
return allModes!=NULL ? &allModes->comp : NULL;
}
const Normalizer2 *
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
return allModes!=NULL ? &allModes->decomp : NULL;
}
const Normalizer2 *
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
return allModes!=NULL ? &allModes->comp : NULL;
}
const Normalizer2 *
Normalizer2::getInstance(const char *packageName,
const char *name,
UNormalization2Mode mode,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
if(name==NULL || *name==0) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
const Norm2AllModes *allModes=NULL;
if(packageName==NULL) {
if(0==uprv_strcmp(name, "nfc")) {
allModes=Norm2AllModes::getNFCInstance(errorCode);
} else if(0==uprv_strcmp(name, "nfkc")) {
allModes=Norm2AllModes::getNFKCInstance(errorCode);
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
}
}
if(allModes==NULL && U_SUCCESS(errorCode)) {
{
Mutex lock;
if(cache!=NULL) {
allModes=(Norm2AllModes *)uhash_get(cache, name);
}
}
if(allModes==NULL) {
LocalPointer<Norm2AllModes> localAllModes(
Norm2AllModes::createInstance(packageName, name, errorCode));
if(U_SUCCESS(errorCode)) {
Mutex lock;
if(cache==NULL) {
cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
if(U_FAILURE(errorCode)) {
return NULL;
}
uhash_setKeyDeleter(cache, uprv_free);
uhash_setValueDeleter(cache, deleteNorm2AllModes);
}
void *temp=uhash_get(cache, name);
if(temp==NULL) {
int32_t keyLength=uprv_strlen(name)+1;
char *nameCopy=(char *)uprv_malloc(keyLength);
if(nameCopy==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(nameCopy, name, keyLength);
allModes=localAllModes.getAlias();
uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
} else {
// race condition
allModes=(Norm2AllModes *)temp;
}
}
}
}
if(allModes!=NULL && U_SUCCESS(errorCode)) {
switch(mode) {
case UNORM2_COMPOSE:
return &allModes->comp;
case UNORM2_DECOMPOSE:
return &allModes->decomp;
case UNORM2_FCD:
return &allModes->fcd;
case UNORM2_COMPOSE_CONTIGUOUS:
return &allModes->fcc;
default:
break; // do nothing
}
}
return NULL;
}
const Normalizer2 *
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
switch(mode) {
case UNORM_NFD:
return Normalizer2::getNFDInstance(errorCode);
case UNORM_NFKD:
return Normalizer2::getNFKDInstance(errorCode);
case UNORM_NFC:
return Normalizer2::getNFCInstance(errorCode);
case UNORM_NFKC:
return Normalizer2::getNFKCInstance(errorCode);
case UNORM_FCD:
return getFCDInstance(errorCode);
default: // UNORM_NONE
return getNoopInstance(errorCode);
}
}
const Normalizer2Impl *
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
return allModes!=NULL ? allModes->impl : NULL;
}
const Normalizer2Impl *
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
return allModes!=NULL ? allModes->impl : NULL;
}
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
U_NAMESPACE_USE
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char *packageName,
const char *name,
UNormalization2Mode mode,
UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
}
U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
return UNORM_YES;
}
UErrorCode errorCode=U_ZERO_ERROR;
const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
if(U_SUCCESS(errorCode)) {
return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
} else {
return UNORM_MAYBE;
}
}
#endif // !UCONFIG_NO_NORMALIZATION

View File

@ -0,0 +1,25 @@
/*
***************************************************************************
* Copyright (C) 2006 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
#ifndef LOCALSVC_H
#define LOCALSVC_H
#include "unicode/utypes.h"
#if U_LOCAL_SERVICE_HOOK
/**
* Prototype for user-supplied service hook. This function is expected to return
* a type of factory object specific to the requested service.
*
* @param what service-specific string identifying the specific user hook
* @param status error status
* @return a service-specific hook, or NULL on failure.
*/
U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status);
#endif
#endif

View File

@ -0,0 +1,179 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: locavailable.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010feb25
* created by: Markus W. Scherer
*
* Code for available locales, separated out from other .cpp files
* that then do not depend on resource bundle code and res_index bundles.
*/
#include "unicode/utypes.h"
#include "unicode/locid.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "cmemory.h"
#include "ucln_cmn.h"
#include "uassert.h"
#include "umutex.h"
#include "uresimp.h"
// C++ API ----------------------------------------------------------------- ***
U_NAMESPACE_BEGIN
static icu::Locale* availableLocaleList = NULL;
static int32_t availableLocaleListCount;
static icu::UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
U_NAMESPACE_END
U_CDECL_BEGIN
static UBool U_CALLCONV locale_available_cleanup(void)
{
U_NAMESPACE_USE
if (availableLocaleList) {
delete []availableLocaleList;
availableLocaleList = NULL;
}
availableLocaleListCount = 0;
gInitOnce.reset();
return TRUE;
}
U_CDECL_END
U_NAMESPACE_BEGIN
void U_CALLCONV locale_available_init() {
// This function is a friend of class Locale.
// This function is only invoked via umtx_initOnce().
// for now, there is a hardcoded list, so just walk through that list and set it up.
// Note: this function is a friend of class Locale.
availableLocaleListCount = uloc_countAvailable();
if(availableLocaleListCount) {
availableLocaleList = new Locale[availableLocaleListCount];
}
if (availableLocaleList == NULL) {
availableLocaleListCount= 0;
}
for (int32_t locCount=availableLocaleListCount-1; locCount>=0; --locCount) {
availableLocaleList[locCount].setFromPOSIXID(uloc_getAvailable(locCount));
}
ucln_common_registerCleanup(UCLN_COMMON_LOCALE_AVAILABLE, locale_available_cleanup);
}
const Locale* U_EXPORT2
Locale::getAvailableLocales(int32_t& count)
{
umtx_initOnce(gInitOnce, &locale_available_init);
count = availableLocaleListCount;
return availableLocaleList;
}
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
U_NAMESPACE_USE
/* ### Constants **************************************************/
/* These strings describe the resources we attempt to load from
the locale ResourceBundle data file.*/
static const char _kIndexLocaleName[] = "res_index";
static const char _kIndexTag[] = "InstalledLocales";
static char** _installedLocales = NULL;
static int32_t _installedLocalesCount = 0;
static icu::UInitOnce _installedLocalesInitOnce;
/* ### Get available **************************************************/
static UBool U_CALLCONV uloc_cleanup(void) {
char ** temp;
if (_installedLocales) {
temp = _installedLocales;
_installedLocales = NULL;
_installedLocalesCount = 0;
_installedLocalesInitOnce.reset();
uprv_free(temp);
}
return TRUE;
}
// Load Installed Locales. This function will be called exactly once
// via the initOnce mechanism.
static void U_CALLCONV loadInstalledLocales() {
UResourceBundle *indexLocale = NULL;
UResourceBundle installed;
UErrorCode status = U_ZERO_ERROR;
int32_t i = 0;
int32_t localeCount;
U_ASSERT(_installedLocales == NULL);
U_ASSERT(_installedLocalesCount == 0);
_installedLocalesCount = 0;
ures_initStackObject(&installed);
indexLocale = ures_openDirect(NULL, _kIndexLocaleName, &status);
ures_getByKey(indexLocale, _kIndexTag, &installed, &status);
if(U_SUCCESS(status)) {
localeCount = ures_getSize(&installed);
_installedLocales = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
if (_installedLocales != NULL) {
ures_resetIterator(&installed);
while(ures_hasNext(&installed)) {
ures_getNextString(&installed, NULL, (const char **)&_installedLocales[i++], &status);
}
_installedLocales[i] = NULL;
_installedLocalesCount = localeCount;
ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
}
}
ures_close(&installed);
ures_close(indexLocale);
}
static void _load_installedLocales()
{
umtx_initOnce(_installedLocalesInitOnce, &loadInstalledLocales);
}
U_CAPI const char* U_EXPORT2
uloc_getAvailable(int32_t offset)
{
_load_installedLocales();
if (offset > _installedLocalesCount)
return NULL;
return _installedLocales[offset];
}
U_CAPI int32_t U_EXPORT2
uloc_countAvailable()
{
_load_installedLocales();
return _installedLocalesCount;
}

View File

@ -0,0 +1,53 @@
/*
**********************************************************************
* Copyright (c) 2004-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: January 16 2004
* Since: ICU 2.8
**********************************************************************
*/
#include "locbased.h"
#include "cstring.h"
U_NAMESPACE_BEGIN
Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
const char* id = getLocaleID(type, status);
return Locale((id != 0) ? id : "");
}
const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
if (U_FAILURE(status)) {
return NULL;
}
switch(type) {
case ULOC_VALID_LOCALE:
return valid;
case ULOC_ACTUAL_LOCALE:
return actual;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
}
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
if (validID != 0) {
uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
}
if (actualID != 0) {
uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
}
}
void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
uprv_strcpy(valid, validID.getName());
uprv_strcpy(actual, actualID.getName());
}
U_NAMESPACE_END

View File

@ -0,0 +1,105 @@
/*
**********************************************************************
* Copyright (c) 2004-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: January 16 2004
* Since: ICU 2.8
**********************************************************************
*/
#ifndef LOCBASED_H
#define LOCBASED_H
#include "unicode/locid.h"
#include "unicode/uobject.h"
/**
* Macro to declare a locale LocaleBased wrapper object for the given
* object, which must have two members named `validLocale' and
* `actualLocale' of size ULOC_FULLNAME_CAPACITY
*/
#define U_LOCALE_BASED(varname, objname) \
LocaleBased varname((objname).validLocale, (objname).actualLocale);
U_NAMESPACE_BEGIN
/**
* A utility class that unifies the implementation of getLocale() by
* various ICU services. This class is likely to be removed in the
* ICU 3.0 time frame in favor of an integrated approach with the
* services framework.
* @since ICU 2.8
*/
class U_COMMON_API LocaleBased : public UMemory {
public:
/**
* Construct a LocaleBased wrapper around the two pointers. These
* will be aliased for the lifetime of this object.
*/
inline LocaleBased(char* validAlias, char* actualAlias);
/**
* Construct a LocaleBased wrapper around the two const pointers.
* These will be aliased for the lifetime of this object.
*/
inline LocaleBased(const char* validAlias, const char* actualAlias);
/**
* Return locale meta-data for the service object wrapped by this
* object. Either the valid or the actual locale may be
* retrieved.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code
* @return the indicated locale
*/
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
/**
* Return the locale ID for the service object wrapped by this
* object. Either the valid or the actual locale may be
* retrieved.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code
* @return the indicated locale ID
*/
const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
/**
* Set the locale meta-data for the service object wrapped by this
* object. If either parameter is zero, it is ignored.
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const char* valid, const char* actual);
/**
* Set the locale meta-data for the service object wrapped by this
* object.
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const Locale& valid, const Locale& actual);
private:
char* valid;
char* actual;
};
inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
valid(validAlias), actual(actualAlias) {
}
inline LocaleBased::LocaleBased(const char* validAlias,
const char* actualAlias) :
// ugh: cast away const
valid((char*)validAlias), actual((char*)actualAlias) {
}
U_NAMESPACE_END
#endif

View File

@ -0,0 +1,882 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: locdispnames.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010feb25
* created by: Markus W. Scherer
*
* Code for locale display names, separated out from other .cpp files
* that then do not depend on resource bundle code and display name data.
*/
#include "unicode/utypes.h"
#include "unicode/brkiter.h"
#include "unicode/locid.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "ulocimp.h"
#include "uresimp.h"
#include "ureslocs.h"
#include "ustr_imp.h"
// C++ API ----------------------------------------------------------------- ***
U_NAMESPACE_BEGIN
UnicodeString&
Locale::getDisplayLanguage(UnicodeString& dispLang) const
{
return this->getDisplayLanguage(getDefault(), dispLang);
}
/*We cannot make any assumptions on the size of the output display strings
* Yet, since we are calling through to a C API, we need to set limits on
* buffer size. For all the following getDisplay functions we first attempt
* to fill up a stack allocated buffer. If it is to small we heap allocated
* the exact buffer we need copy it to the UnicodeString and delete it*/
UnicodeString&
Locale::getDisplayLanguage(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
UnicodeString&
Locale::getDisplayScript(UnicodeString& dispScript) const
{
return this->getDisplayScript(getDefault(), dispScript);
}
UnicodeString&
Locale::getDisplayScript(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayScript(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayScript(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
UnicodeString&
Locale::getDisplayCountry(UnicodeString& dispCntry) const
{
return this->getDisplayCountry(getDefault(), dispCntry);
}
UnicodeString&
Locale::getDisplayCountry(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
UnicodeString&
Locale::getDisplayVariant(UnicodeString& dispVar) const
{
return this->getDisplayVariant(getDefault(), dispVar);
}
UnicodeString&
Locale::getDisplayVariant(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
UnicodeString&
Locale::getDisplayName( UnicodeString& name ) const
{
return this->getDisplayName(getDefault(), name);
}
UnicodeString&
Locale::getDisplayName(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayName(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayName(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
#if ! UCONFIG_NO_BREAK_ITERATION
// -------------------------------------
// Gets the objectLocale display name in the default locale language.
UnicodeString& U_EXPORT2
BreakIterator::getDisplayName(const Locale& objectLocale,
UnicodeString& name)
{
return objectLocale.getDisplayName(name);
}
// -------------------------------------
// Gets the objectLocale display name in the displayLocale language.
UnicodeString& U_EXPORT2
BreakIterator::getDisplayName(const Locale& objectLocale,
const Locale& displayLocale,
UnicodeString& name)
{
return objectLocale.getDisplayName(displayLocale, name);
}
#endif
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
U_NAMESPACE_USE
/* ### Constants **************************************************/
/* These strings describe the resources we attempt to load from
the locale ResourceBundle data file.*/
static const char _kLanguages[] = "Languages";
static const char _kScripts[] = "Scripts";
static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
static const char _kCountries[] = "Countries";
static const char _kVariants[] = "Variants";
static const char _kKeys[] = "Keys";
static const char _kTypes[] = "Types";
//static const char _kRootName[] = "root";
static const char _kCurrency[] = "currency";
static const char _kCurrencies[] = "Currencies";
static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
static const char _kPattern[] = "pattern";
static const char _kSeparator[] = "separator";
/* ### Display name **************************************************/
static int32_t
_getStringOrCopyKey(const char *path, const char *locale,
const char *tableKey,
const char* subTableKey,
const char *itemKey,
const char *substitute,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
const UChar *s = NULL;
int32_t length = 0;
if(itemKey==NULL) {
/* top-level item: normal resource bundle access */
UResourceBundle *rb;
rb=ures_open(path, locale, pErrorCode);
if(U_SUCCESS(*pErrorCode)) {
s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
/* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
ures_close(rb);
}
} else {
/* Language code should not be a number. If it is, set the error code. */
if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
*pErrorCode = U_MISSING_RESOURCE_ERROR;
} else {
/* second-level item, use special fallback */
s=uloc_getTableStringWithFallback(path, locale,
tableKey,
subTableKey,
itemKey,
&length,
pErrorCode);
}
}
if(U_SUCCESS(*pErrorCode)) {
int32_t copyLength=uprv_min(length, destCapacity);
if(copyLength>0 && s != NULL) {
u_memcpy(dest, s, copyLength);
}
} else {
/* no string from a resource bundle: convert the substitute */
length=(int32_t)uprv_strlen(substitute);
u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
*pErrorCode=U_USING_DEFAULT_WARNING;
}
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
typedef int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
static int32_t
_getDisplayNameForComponent(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UDisplayNameGetter *getter,
const char *tag,
UErrorCode *pErrorCode) {
char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
int32_t length;
UErrorCode localStatus;
const char* root = NULL;
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
localStatus = U_ZERO_ERROR;
length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(length==0) {
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
}
root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
return _getStringOrCopyKey(root, displayLocale,
tag, NULL, localeBuffer,
localeBuffer,
dest, destCapacity,
pErrorCode);
}
U_CAPI int32_t U_EXPORT2
uloc_getDisplayLanguage(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getLanguage, _kLanguages, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
uloc_getDisplayScript(const char* locale,
const char* displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode)
{
UErrorCode err = U_ZERO_ERROR;
int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getScript, _kScriptsStandAlone, &err);
if ( err == U_USING_DEFAULT_WARNING ) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getScript, _kScripts, pErrorCode);
} else {
*pErrorCode = err;
return res;
}
}
U_INTERNAL int32_t U_EXPORT2
uloc_getDisplayScriptInContext(const char* locale,
const char* displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode)
{
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getScript, _kScripts, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
uloc_getDisplayCountry(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getCountry, _kCountries, pErrorCode);
}
/*
* TODO separate variant1_variant2_variant3...
* by getting each tag's display string and concatenating them with ", "
* in between - similar to uloc_getDisplayName()
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayVariant(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getVariant, _kVariants, pErrorCode);
}
/* Instead of having a separate pass for 'special' patterns, reintegrate the two
* so we don't get bitten by preflight bugs again. We can be reasonably efficient
* without two separate code paths, this code isn't that performance-critical.
*
* This code is general enough to deal with patterns that have a prefix or swap the
* language and remainder components, since we gave developers enough rope to do such
* things if they futz with the pattern data. But since we don't give them a way to
* specify a pattern for arbitrary combinations of components, there's not much use in
* that. I don't think our data includes such patterns, the only variable I know if is
* whether there is a space before the open paren, or not. Oh, and zh uses different
* chars than the standard open/close paren (which ja and ko use, btw).
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode)
{
static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
static const int32_t subLen = 3;
static const UChar defaultPattern[10] = {
0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
}; /* {0} ({1}) */
static const int32_t defaultPatLen = 9;
static const int32_t defaultSub0Pos = 0;
static const int32_t defaultSub1Pos = 5;
int32_t length; /* of formatted result */
const UChar *separator;
int32_t sepLen = 0;
const UChar *pattern;
int32_t patLen = 0;
int32_t sub0Pos, sub1Pos;
UChar formatOpenParen = 0x0028; // (
UChar formatReplaceOpenParen = 0x005B; // [
UChar formatCloseParen = 0x0029; // )
UChar formatReplaceCloseParen = 0x005D; // ]
UBool haveLang = TRUE; /* assume true, set false if we find we don't have
a lang component in the locale */
UBool haveRest = TRUE; /* assume true, set false if we find we don't have
any other component in the locale */
UBool retry = FALSE; /* set true if we need to retry, see below */
int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
{
UErrorCode status = U_ZERO_ERROR;
UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status);
UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern,
NULL, &status);
separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status);
pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status);
ures_close(dspbundle);
ures_close(locbundle);
}
/* If we couldn't find any data, then use the defaults */
if(sepLen == 0) {
separator = defaultSeparator;
}
/* #10244: Even though separator is now a pattern, it is awkward to handle it as such
* here since we are trying to build the display string in place in the dest buffer,
* and to handle it as a pattern would entail having separate storage for the
* substrings that need to be combined (the first of which may be the result of
* previous such combinations). So for now we continue to treat the portion between
* {0} and {1} as a string to be appended when joining substrings, ignoring anything
* that is before {0} or after {1} (no existing separator pattern has any such thing).
* This is similar to how pattern is handled below.
*/
{
UChar *p0=u_strstr(separator, sub0);
UChar *p1=u_strstr(separator, sub1);
if (p0==NULL || p1==NULL || p1<p0) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
separator = (const UChar *)p0 + subLen;
sepLen = p1 - separator;
}
if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
pattern=defaultPattern;
patLen=defaultPatLen;
sub0Pos=defaultSub0Pos;
sub1Pos=defaultSub1Pos;
// use default formatOpenParen etc. set above
} else { /* non-default pattern */
UChar *p0=u_strstr(pattern, sub0);
UChar *p1=u_strstr(pattern, sub1);
if (p0==NULL || p1==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
sub0Pos=p0-pattern;
sub1Pos=p1-pattern;
if (sub1Pos < sub0Pos) { /* a very odd pattern */
int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
langi=1;
}
if (u_strchr(pattern, 0xFF08) != NULL) {
formatOpenParen = 0xFF08; // fullwidth (
formatReplaceOpenParen = 0xFF3B; // fullwidth [
formatCloseParen = 0xFF09; // fullwidth )
formatReplaceCloseParen = 0xFF3D; // fullwidth ]
}
}
/* We loop here because there is one case in which after the first pass we could need to
* reextract the data. If there's initial padding before the first element, we put in
* the padding and then write that element. If it turns out there's no second element,
* we didn't need the padding. If we do need the data (no preflight), and the first element
* would have fit but for the padding, we need to reextract. In this case (only) we
* adjust the parameters so padding is not added, and repeat.
*/
do {
UChar* p=dest;
int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
int32_t langLen=0; /* length of language substitution */
int32_t langPos=0; /* position in output of language substitution */
int32_t restLen=0; /* length of 'everything else' substitution */
int32_t restPos=0; /* position in output of 'everything else' substitution */
UEnumeration* kenum = NULL; /* keyword enumeration */
/* prefix of pattern, extremely likely to be empty */
if(sub0Pos) {
if(destCapacity >= sub0Pos) {
while (patPos < sub0Pos) {
*p++ = pattern[patPos++];
}
} else {
patPos=sub0Pos;
}
length=sub0Pos;
} else {
length=0;
}
for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
UBool subdone = FALSE; /* set true when ready to move to next substitution */
/* prep p and cap for calls to get display components, pin cap to 0 since
they complain if cap is negative */
int32_t cap=destCapacity-length;
if (cap <= 0) {
cap=0;
} else {
p=dest+length;
}
if (subi == langi) { /* {0}*/
if(haveLang) {
langPos=length;
langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
length+=langLen;
haveLang=langLen>0;
}
subdone=TRUE;
} else { /* {1} */
if(!haveRest) {
subdone=TRUE;
} else {
int32_t len; /* length of component (plus other stuff) we just fetched */
switch(resti++) {
case 0:
restPos=length;
len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
break;
case 1:
len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
break;
case 2:
len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
break;
case 3:
kenum = uloc_openKeywords(locale, pErrorCode);
/* fall through */
default: {
const char* kw=uenum_next(kenum, &len, pErrorCode);
if (kw == NULL) {
uenum_close(kenum);
len=0; /* mark that we didn't add a component */
subdone=TRUE;
} else {
/* incorporating this behavior into the loop made it even more complex,
so just special case it here */
len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
if(len) {
if(len < cap) {
p[len]=0x3d; /* '=', assume we'll need it */
}
len+=1;
/* adjust for call to get keyword */
cap-=len;
if(cap <= 0) {
cap=0;
} else {
p+=len;
}
}
/* reset for call below */
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
}
int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
p, cap, pErrorCode);
if(len) {
if(vlen==0) {
--len; /* remove unneeded '=' */
}
/* restore cap and p to what they were at start */
cap=destCapacity-length;
if(cap <= 0) {
cap=0;
} else {
p=dest+length;
}
}
len+=vlen; /* total we added for key + '=' + value */
}
} break;
} /* end switch */
if (len>0) {
/* we addeed a component, so add separator and write it if there's room. */
if(len+sepLen<=cap) {
const UChar * plimit = p + len;
for (; p < plimit; p++) {
if (*p == formatOpenParen) {
*p = formatReplaceOpenParen;
} else if (*p == formatCloseParen) {
*p = formatReplaceCloseParen;
}
}
for(int32_t i=0;i<sepLen;++i) {
*p++=separator[i];
}
}
length+=len+sepLen;
} else if(subdone) {
/* remove separator if we added it */
if (length!=restPos) {
length-=sepLen;
}
restLen=length-restPos;
haveRest=restLen>0;
}
}
}
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
}
if(subdone) {
if(haveLang && haveRest) {
/* append internal portion of pattern, the first time,
or last portion of pattern the second time */
int32_t padLen;
patPos+=subLen;
padLen=(subi==0 ? sub1Pos : patLen)-patPos;
if(length+padLen < destCapacity) {
p=dest+length;
for(int32_t i=0;i<padLen;++i) {
*p++=pattern[patPos++];
}
} else {
patPos+=padLen;
}
length+=padLen;
} else if(subi==0) {
/* don't have first component, reset for second component */
sub0Pos=0;
length=0;
} else if(length>0) {
/* true length is the length of just the component we got. */
length=haveLang?langLen:restLen;
if(dest && sub0Pos!=0) {
if (sub0Pos+length<=destCapacity) {
/* first component not at start of result,
but we have full component in buffer. */
u_memmove(dest, dest+(haveLang?langPos:restPos), length);
} else {
/* would have fit, but didn't because of pattern prefix. */
sub0Pos=0; /* stops initial padding (and a second retry,
so we won't end up here again) */
retry=TRUE;
}
}
}
++subi; /* move on to next substitution */
}
}
} while(retry);
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
uloc_getDisplayKeyword(const char* keyword,
const char* displayLocale,
UChar* dest,
int32_t destCapacity,
UErrorCode* status){
/* argument checking */
if(status==NULL || U_FAILURE(*status)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* pass itemKey=NULL to look for a top-level item */
return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
_kKeys, NULL,
keyword,
keyword,
dest, destCapacity,
status);
}
#define UCURRENCY_DISPLAY_NAME_INDEX 1
U_CAPI int32_t U_EXPORT2
uloc_getDisplayKeywordValue( const char* locale,
const char* keyword,
const char* displayLocale,
UChar* dest,
int32_t destCapacity,
UErrorCode* status){
char keywordValue[ULOC_FULLNAME_CAPACITY*4];
int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
int32_t keywordValueLen =0;
/* argument checking */
if(status==NULL || U_FAILURE(*status)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* get the keyword value */
keywordValue[0]=0;
keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
/*
* if the keyword is equal to currency .. then to get the display name
* we need to do the fallback ourselves
*/
if(uprv_stricmp(keyword, _kCurrency)==0){
int32_t dispNameLen = 0;
const UChar *dispName = NULL;
UResourceBundle *bundle = ures_open(U_ICUDATA_CURR, displayLocale, status);
UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
UResourceBundle *currency = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
/*close the bundles */
ures_close(currency);
ures_close(currencies);
ures_close(bundle);
if(U_FAILURE(*status)){
if(*status == U_MISSING_RESOURCE_ERROR){
/* we just want to write the value over if nothing is available */
*status = U_USING_DEFAULT_WARNING;
}else{
return 0;
}
}
/* now copy the dispName over if not NULL */
if(dispName != NULL){
if(dispNameLen <= destCapacity){
uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
return u_terminateUChars(dest, destCapacity, dispNameLen, status);
}else{
*status = U_BUFFER_OVERFLOW_ERROR;
return dispNameLen;
}
}else{
/* we have not found the display name for the value .. just copy over */
if(keywordValueLen <= destCapacity){
u_charsToUChars(keywordValue, dest, keywordValueLen);
return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
}else{
*status = U_BUFFER_OVERFLOW_ERROR;
return keywordValueLen;
}
}
}else{
return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
_kTypes, keyword,
keywordValue,
keywordValue,
dest, destCapacity,
status);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,37 @@
/*
******************************************************************************
*
* Copyright (C) 1996-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File locmap.h : Locale Mapping Classes
*
*
* Created by: Helena Shih
*
* Modification History:
*
* Date Name Description
* 3/11/97 aliu Added setId().
* 4/20/99 Madhu Added T_convertToPosix()
* 09/18/00 george Removed the memory leaks.
* 08/23/01 george Convert to C
*============================================================================
*/
#ifndef LOCMAP_H
#define LOCMAP_H
#include "unicode/utypes.h"
#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status);
/* Don't call this function directly. Use uloc_getLCID instead. */
U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status);
#endif /* LOCMAP_H */

View File

@ -0,0 +1,223 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: loclikely.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010feb25
* created by: Markus W. Scherer
*
* Code for miscellaneous locale-related resource bundle data access,
* separated out from other .cpp files
* that then do not depend on resource bundle code and this data.
*/
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "cstring.h"
#include "ulocimp.h"
#include "uresimp.h"
/*
* Lookup a resource bundle table item with fallback on the table level.
* Regular resource bundle lookups perform fallback to parent locale bundles
* and eventually the root bundle, but only for top-level items.
* This function takes the name of a top-level table and of an item in that table
* and performs a lookup of both, falling back until a bundle contains a table
* with this item.
*
* Note: Only the opening of entire bundles falls back through the default locale
* before root. Once a bundle is open, item lookups do not go through the
* default locale because that would result in a mix of languages that is
* unpredictable to the programmer and most likely useless.
*/
U_CAPI const UChar * U_EXPORT2
uloc_getTableStringWithFallback(const char *path, const char *locale,
const char *tableKey, const char *subTableKey,
const char *itemKey,
int32_t *pLength,
UErrorCode *pErrorCode)
{
/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
UResourceBundle *rb=NULL, table, subTable;
const UChar *item=NULL;
UErrorCode errorCode;
char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
/*
* open the bundle for the current locale
* this falls back through the locale's chain to root
*/
errorCode=U_ZERO_ERROR;
rb=ures_open(path, locale, &errorCode);
if(U_FAILURE(errorCode)) {
/* total failure, not even root could be opened */
*pErrorCode=errorCode;
return NULL;
} else if(errorCode==U_USING_DEFAULT_WARNING ||
(errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
) {
/* set the "strongest" error code (success->fallback->default->failure) */
*pErrorCode=errorCode;
}
for(;;){
ures_initStackObject(&table);
ures_initStackObject(&subTable);
ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
if (subTableKey != NULL) {
/*
ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
if(U_FAILURE(errorCode)){
*pErrorCode = errorCode;
}
break;*/
ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
}
if(U_SUCCESS(errorCode)){
item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
if(U_FAILURE(errorCode)){
const char* replacement = NULL;
*pErrorCode = errorCode; /*save the errorCode*/
errorCode = U_ZERO_ERROR;
/* may be a deprecated code */
if(uprv_strcmp(tableKey, "Countries")==0){
replacement = uloc_getCurrentCountryID(itemKey);
}else if(uprv_strcmp(tableKey, "Languages")==0){
replacement = uloc_getCurrentLanguageID(itemKey);
}
/*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
if(replacement!=NULL && itemKey != replacement){
item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
if(U_SUCCESS(errorCode)){
*pErrorCode = errorCode;
break;
}
}
}else{
break;
}
}
if(U_FAILURE(errorCode)){
/* still can't figure out ?.. try the fallback mechanism */
int32_t len = 0;
const UChar* fallbackLocale = NULL;
*pErrorCode = errorCode;
errorCode = U_ZERO_ERROR;
fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
if(U_FAILURE(errorCode)){
*pErrorCode = errorCode;
break;
}
u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
/* guard against recursive fallback */
if(uprv_strcmp(explicitFallbackName, locale)==0){
*pErrorCode = U_INTERNAL_PROGRAM_ERROR;
break;
}
ures_close(rb);
rb = ures_open(path, explicitFallbackName, &errorCode);
if(U_FAILURE(errorCode)){
*pErrorCode = errorCode;
break;
}
/* succeeded in opening the fallback bundle .. continue and try to fetch the item */
}else{
break;
}
}
/* done with the locale string - ready to close table and rb */
ures_close(&subTable);
ures_close(&table);
ures_close(rb);
return item;
}
static ULayoutType
_uloc_getOrientationHelper(const char* localeId,
const char* key,
UErrorCode *status)
{
ULayoutType result = ULOC_LAYOUT_UNKNOWN;
if (!U_FAILURE(*status)) {
int32_t length = 0;
char localeBuffer[ULOC_FULLNAME_CAPACITY];
uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
if (!U_FAILURE(*status)) {
const UChar* const value =
uloc_getTableStringWithFallback(
NULL,
localeBuffer,
"layout",
NULL,
key,
&length,
status);
if (!U_FAILURE(*status) && length != 0) {
switch(value[0])
{
case 0x0062: /* 'b' */
result = ULOC_LAYOUT_BTT;
break;
case 0x006C: /* 'l' */
result = ULOC_LAYOUT_LTR;
break;
case 0x0072: /* 'r' */
result = ULOC_LAYOUT_RTL;
break;
case 0x0074: /* 't' */
result = ULOC_LAYOUT_TTB;
break;
default:
*status = U_INTERNAL_PROGRAM_ERROR;
break;
}
}
}
}
return result;
}
U_CAPI ULayoutType U_EXPORT2
uloc_getCharacterOrientation(const char* localeId,
UErrorCode *status)
{
return _uloc_getOrientationHelper(localeId, "characters", status);
}
/**
* Get the layout line orientation for the specified locale.
*
* @param localeID locale name
* @param status Error status
* @return an enum indicating the layout orientation for lines.
*/
U_CAPI ULayoutType U_EXPORT2
uloc_getLineOrientation(const char* localeId,
UErrorCode *status)
{
return _uloc_getOrientationHelper(localeId, "lines", status);
}

View File

@ -0,0 +1,273 @@
/*
*******************************************************************************
* Copyright (C) 2002-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
#include "unicode/resbund.h"
#include "cmemory.h"
#include "ustrfmt.h"
#include "locutil.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
#include "umutex.h"
// see LocaleUtility::getAvailableLocaleNames
static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
static icu::Hashtable * LocaleUtility_cache = NULL;
#define UNDERSCORE_CHAR ((UChar)0x005f)
#define AT_SIGN_CHAR ((UChar)64)
#define PERIOD_CHAR ((UChar)46)
/*
******************************************************************
*/
/**
* Release all static memory held by Locale Utility.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV service_cleanup(void) {
if (LocaleUtility_cache) {
delete LocaleUtility_cache;
LocaleUtility_cache = NULL;
}
return TRUE;
}
static void U_CALLCONV locale_utility_init(UErrorCode &status) {
using namespace icu;
U_ASSERT(LocaleUtility_cache == NULL);
ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
LocaleUtility_cache = new Hashtable(status);
if (U_FAILURE(status)) {
delete LocaleUtility_cache;
LocaleUtility_cache = NULL;
return;
}
if (LocaleUtility_cache == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);
}
U_CDECL_END
U_NAMESPACE_BEGIN
UnicodeString&
LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
{
if (id == NULL) {
result.setToBogus();
} else {
// Fix case only (no other changes) up to the first '@' or '.' or
// end of string, whichever comes first. In 3.0 I changed this to
// stop at first '@' or '.'. It used to run out to the end of
// string. My fix makes the tests pass but is probably
// structurally incorrect. See below. [alan 3.0]
// TODO: Doug, you might want to revise this...
result = *id;
int32_t i = 0;
int32_t end = result.indexOf(AT_SIGN_CHAR);
int32_t n = result.indexOf(PERIOD_CHAR);
if (n >= 0 && n < end) {
end = n;
}
if (end < 0) {
end = result.length();
}
n = result.indexOf(UNDERSCORE_CHAR);
if (n < 0) {
n = end;
}
for (; i < n; ++i) {
UChar c = result.charAt(i);
if (c >= 0x0041 && c <= 0x005a) {
c += 0x20;
result.setCharAt(i, c);
}
}
for (n = end; i < n; ++i) {
UChar c = result.charAt(i);
if (c >= 0x0061 && c <= 0x007a) {
c -= 0x20;
result.setCharAt(i, c);
}
}
}
return result;
#if 0
// This code does a proper full level 2 canonicalization of id.
// It's nasty to go from UChar to char to char to UChar -- but
// that's what you have to do to use the uloc_canonicalize
// function on UnicodeStrings.
// I ended up doing the alternate fix (see above) not for
// performance reasons, although performance will certainly be
// better, but because doing a full level 2 canonicalization
// causes some tests to fail. [alan 3.0]
// TODO: Doug, you might want to revisit this...
result.setToBogus();
if (id != 0) {
int32_t buflen = id->length() + 8; // space for NUL
char* buf = (char*) uprv_malloc(buflen);
char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
if (buf != 0 && canon != 0) {
U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
UErrorCode ec = U_ZERO_ERROR;
uloc_canonicalize(buf, canon, buflen, &ec);
if (U_SUCCESS(ec)) {
result = UnicodeString(canon);
}
}
uprv_free(buf);
uprv_free(canon);
}
return result;
#endif
}
Locale&
LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
{
enum { BUFLEN = 128 }; // larger than ever needed
if (id.isBogus() || id.length() >= BUFLEN) {
result.setToBogus();
} else {
/*
* We need to convert from a UnicodeString to char * in order to
* create a Locale.
*
* Problem: Locale ID strings may contain '@' which is a variant
* character and cannot be handled by invariant-character conversion.
*
* Hack: Since ICU code can handle locale IDs with multiple encodings
* of '@' (at least for EBCDIC; it's not known to be a problem for
* ASCII-based systems),
* we use regular invariant-character conversion for everything else
* and manually convert U+0040 into a compiler-char-constant '@'.
* While this compilation-time constant may not match the runtime
* encoding of '@', it should be one of the encodings which ICU
* recognizes.
*
* There should be only at most one '@' in a locale ID.
*/
char buffer[BUFLEN];
int32_t prev, i;
prev = 0;
for(;;) {
i = id.indexOf((UChar)0x40, prev);
if(i < 0) {
// no @ between prev and the rest of the string
id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
break; // done
} else {
// normal invariant-character conversion for text between @s
id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
// manually "convert" U+0040 at id[i] into '@' at buffer[i]
buffer[i] = '@';
prev = i + 1;
}
}
result = Locale::createFromName(buffer);
}
return result;
}
UnicodeString&
LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
{
if (locale.isBogus()) {
result.setToBogus();
} else {
result.append(UnicodeString(locale.getName(), -1, US_INV));
}
return result;
}
const Hashtable*
LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
{
// LocaleUtility_cache is a hash-of-hashes. The top-level keys
// are path strings ('bundleID') passed to
// ures_openAvailableLocales. The top-level values are
// second-level hashes. The second-level keys are result strings
// from ures_openAvailableLocales. The second-level values are
// garbage ((void*)1 or other random pointer).
UErrorCode status = U_ZERO_ERROR;
umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
Hashtable *cache = LocaleUtility_cache;
if (cache == NULL) {
// Catastrophic failure.
return NULL;
}
Hashtable* htp;
umtx_lock(NULL);
htp = (Hashtable*) cache->get(bundleID);
umtx_unlock(NULL);
if (htp == NULL) {
htp = new Hashtable(status);
if (htp && U_SUCCESS(status)) {
CharString cbundleID;
cbundleID.appendInvariantChars(bundleID, status);
const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
UEnumeration *uenum = ures_openAvailableLocales(path, &status);
for (;;) {
const UChar* id = uenum_unext(uenum, NULL, &status);
if (id == NULL) {
break;
}
htp->put(UnicodeString(id), (void*)htp, status);
}
uenum_close(uenum);
if (U_FAILURE(status)) {
delete htp;
return NULL;
}
umtx_lock(NULL);
Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));
if (t != NULL) {
// Another thread raced through this code, creating the cache entry first.
// Discard ours and return theirs.
umtx_unlock(NULL);
delete htp;
htp = t;
} else {
cache->put(bundleID, (void*)htp, status);
umtx_unlock(NULL);
}
}
}
return htp;
}
UBool
LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
{
return child.indexOf(root) == 0 &&
(child.length() == root.length() ||
child.charAt(root.length()) == UNDERSCORE_CHAR);
}
U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

View File

@ -0,0 +1,37 @@
/**
*******************************************************************************
* Copyright (C) 2002-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#ifndef LOCUTIL_H
#define LOCUTIL_H
#include "unicode/utypes.h"
#include "hash.h"
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
U_NAMESPACE_BEGIN
// temporary utility functions, till I know where to find them
// in header so tests can also access them
class U_COMMON_API LocaleUtility {
public:
static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result);
static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
};
U_NAMESPACE_END
#endif
#endif

View File

@ -0,0 +1,63 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: messageimpl.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011apr04
* created by: Markus W. Scherer
*/
#ifndef __MESSAGEIMPL_H__
#define __MESSAGEIMPL_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/messagepattern.h"
U_NAMESPACE_BEGIN
/**
* Helper functions for use of MessagePattern.
* In Java, these are package-private methods in MessagePattern itself.
* In C++, they are declared here and implemented in messagepattern.cpp.
*/
class U_COMMON_API MessageImpl {
public:
/**
* @return TRUE if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
*/
static UBool jdkAposMode(const MessagePattern &msgPattern) {
return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED;
}
/**
* Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
* according to JDK pattern behavior.
*/
static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
UnicodeString &sb);
/**
* Appends the sub-message to the result string.
* Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes().
*/
static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
int32_t msgStart,
UnicodeString &result);
private:
MessageImpl(); // no constructor: all static methods
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING
#endif // __MESSAGEIMPL_H__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
//{{NO_DEPENDENCIES}}
// Copyright (c) 2003-2010 International Business Machines
// Corporation and others. All Rights Reserved.
//
// Used by common.rc and other .rc files.
//Do not edit with Microsoft Developer Studio because it will modify this
//header the wrong way. This is here to prevent Visual Studio .NET from
//unnessarily building the resource files when it's not needed.
//
/*
These are defined before unicode/uversion.h in order to prevent
STLPort's broken stddef.h from being used when rc.exe parses this file.
*/
#define _STLP_OUTERMOST_HEADER_ID 0
#define _STLP_WINCE 1
#include "unicode/uversion.h"
#define ICU_WEBSITE "http://icu-project.org"
#define ICU_COMPANY "The ICU Project"
#define ICU_PRODUCT_PREFIX "ICU"
#define ICU_PRODUCT "International Components for Unicode"

View File

@ -0,0 +1,77 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*/
//----------------------------------------------------------------------------
// File: mutex.h
//
// Lightweight C++ wrapper for umtx_ C mutex functions
//
// Author: Alan Liu 1/31/97
// History:
// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop.
// 04/07/1999 srl refocused as a thin wrapper
//
//----------------------------------------------------------------------------
#ifndef MUTEX_H
#define MUTEX_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "umutex.h"
U_NAMESPACE_BEGIN
//----------------------------------------------------------------------------
// Code within that accesses shared static or global data should
// should instantiate a Mutex object while doing so. You should make your own
// private mutex where possible.
// For example:
//
// UMutex myMutex;
//
// void Function(int arg1, int arg2)
// {
// static Object* foo; // Shared read-write object
// Mutex mutex(&myMutex); // or no args for the global lock
// foo->Method();
// // When 'mutex' goes out of scope and gets destroyed here, the lock is released
// }
//
// Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
// returning a Mutex. This is a common mistake which silently slips through the
// compiler!!
//
class U_COMMON_API Mutex : public UMemory {
public:
inline Mutex(UMutex *mutex = NULL);
inline ~Mutex();
private:
UMutex *fMutex;
Mutex(const Mutex &other); // forbid copying of this class
Mutex &operator=(const Mutex &other); // forbid copying of this class
};
inline Mutex::Mutex(UMutex *mutex)
: fMutex(mutex)
{
umtx_lock(fMutex);
}
inline Mutex::~Mutex()
{
umtx_unlock(fMutex);
}
U_NAMESPACE_END
#endif //_MUTEX_
//eof

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,341 @@
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* loadednormalizer2impl.h
*
* created on: 2014sep07
* created by: Markus W. Scherer
*/
#ifndef __NORM2ALLMODES_H__
#define __NORM2ALLMODES_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/normalizer2.h"
#include "unicode/unistr.h"
#include "cpputils.h"
#include "normalizer2impl.h"
U_NAMESPACE_BEGIN
// Intermediate class:
// Has Normalizer2Impl and does boilerplate argument checking and setup.
class Normalizer2WithImpl : public Normalizer2 {
public:
Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
virtual ~Normalizer2WithImpl();
// normalize
virtual UnicodeString &
normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
dest.setToBogus();
return dest;
}
const UChar *sArray=src.getBuffer();
if(&dest==&src || sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
dest.setToBogus();
return dest;
}
dest.remove();
ReorderingBuffer buffer(impl, dest);
if(buffer.init(src.length(), errorCode)) {
normalize(sArray, sArray+src.length(), buffer, errorCode);
}
return dest;
}
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
// normalize and append
virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
}
virtual UnicodeString &
append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
}
UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UBool doNormalize,
UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(first, errorCode);
if(U_FAILURE(errorCode)) {
return first;
}
const UChar *secondArray=second.getBuffer();
if(&first==&second || secondArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return first;
}
int32_t firstLength=first.length();
UnicodeString safeMiddle;
{
ReorderingBuffer buffer(impl, first);
if(buffer.init(firstLength+second.length(), errorCode)) {
normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
safeMiddle, buffer, errorCode);
}
} // The ReorderingBuffer destructor finalizes the first string.
if(U_FAILURE(errorCode)) {
// Restore the modified suffix of the first string.
first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
}
return first;
}
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
UChar buffer[4];
int32_t length;
const UChar *d=impl.getDecomposition(c, buffer, length);
if(d==NULL) {
return FALSE;
}
if(d==buffer) {
decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
} else {
decomposition.setTo(FALSE, d, length); // read-only alias
}
return TRUE;
}
virtual UBool
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
UChar buffer[30];
int32_t length;
const UChar *d=impl.getRawDecomposition(c, buffer, length);
if(d==NULL) {
return FALSE;
}
if(d==buffer) {
decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
} else {
decomposition.setTo(FALSE, d, length); // read-only alias
}
return TRUE;
}
virtual UChar32
composePair(UChar32 a, UChar32 b) const {
return impl.composePair(a, b);
}
virtual uint8_t
getCombiningClass(UChar32 c) const {
return impl.getCC(impl.getNorm16(c));
}
// quick checks
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
const UChar *sLimit=sArray+s.length();
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
}
virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return 0;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
return UNORM_YES;
}
const Normalizer2Impl &impl;
};
class DecomposeNormalizer2 : public Normalizer2WithImpl {
public:
DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
virtual ~DecomposeNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.decompose(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
return impl.decompose(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
}
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
};
class ComposeNormalizer2 : public Normalizer2WithImpl {
public:
ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
virtual ~ComposeNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
}
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
UnicodeString temp;
ReorderingBuffer buffer(impl, temp);
if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
return FALSE;
}
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return UNORM_MAYBE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return UNORM_MAYBE;
}
UNormalizationCheckResult qcResult=UNORM_YES;
impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
return qcResult;
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.getCompQuickCheck(impl.getNorm16(c));
}
virtual UBool hasBoundaryBefore(UChar32 c) const {
return impl.hasCompBoundaryBefore(c);
}
virtual UBool hasBoundaryAfter(UChar32 c) const {
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
}
virtual UBool isInert(UChar32 c) const {
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
}
const UBool onlyContiguous;
};
class FCDNormalizer2 : public Normalizer2WithImpl {
public:
FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
virtual ~FCDNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.makeFCD(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
return impl.makeFCD(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
};
struct Norm2AllModes : public UMemory {
Norm2AllModes(Normalizer2Impl *i)
: impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
~Norm2AllModes();
static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
static Norm2AllModes *createInstance(const char *packageName,
const char *name,
UErrorCode &errorCode);
static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
Normalizer2Impl *impl;
ComposeNormalizer2 comp;
DecomposeNormalizer2 decomp;
FCDNormalizer2 fcd;
ComposeNormalizer2 fcc;
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_NORMALIZATION
#endif // __NORM2ALLMODES_H__

Some files were not shown because too many files have changed in this diff Show More