mirror of
https://github.com/ONLYOFFICE/core.git
synced 2026-04-07 13:55:33 +08:00
Compare commits
19 Commits
v5.1.0.115
...
v5.1.2.8
| Author | SHA1 | Date | |
|---|---|---|---|
| 09407b08ba | |||
| 83bc1b18b6 | |||
| 566eeb06ae | |||
| f5c359650f | |||
| 3c93563027 | |||
| d5b80e2fa3 | |||
| 2bc2039c01 | |||
| a0fdb1cbcc | |||
| 664007983d | |||
| a15aee3244 | |||
| bf8bea5d07 | |||
| d9bd1cdb32 | |||
| e1a8bd3b3d | |||
| a8f7a1f044 | |||
| 5c43b5743d | |||
| 5ae24e7e32 | |||
| cc69c8c1b4 | |||
| d0b2acb6b3 | |||
| 44491bbbb4 |
@ -2026,11 +2026,19 @@ namespace DocFileFormat
|
||||
nElemSize = 4;
|
||||
bTruncated = true;
|
||||
}
|
||||
if (nElemSize == 0)
|
||||
{
|
||||
nElemSize = 2; //enredobar.doc
|
||||
}
|
||||
long dwSize = nElems * nElemSize;
|
||||
|
||||
if (opGuides->op - 6 != (dwSize))
|
||||
{
|
||||
bool b = false;
|
||||
if (nElems > 0x7fff)
|
||||
{
|
||||
dwSize = (opGuides->op - 6);
|
||||
}
|
||||
}
|
||||
int count = dwSize / nElemSize; //1x (int or short)
|
||||
for (int i = 0; i < count; ++i)
|
||||
@ -2070,12 +2078,19 @@ namespace DocFileFormat
|
||||
nElemSize = 4;
|
||||
bTruncated = true;
|
||||
}
|
||||
|
||||
if (nElemSize == 0)
|
||||
{
|
||||
nElemSize = 2; //enredobar.doc
|
||||
}
|
||||
long dwSize = nElems * nElemSize;
|
||||
|
||||
if (opLocs->op - 6 != (dwSize))
|
||||
{
|
||||
bool b = false;
|
||||
if (nElems > 0x7fff)
|
||||
{
|
||||
dwSize = (opLocs->op - 6);
|
||||
}
|
||||
}
|
||||
int count = dwSize / nElemSize; //2x (int or short)
|
||||
|
||||
|
||||
@ -3013,6 +3013,7 @@ namespace BinDocxRW
|
||||
OOX::Logic::CSectionProperty* pSectPr;
|
||||
OOX::Logic::CBackground * pBackground;
|
||||
OOX::CDocument* poDocument;
|
||||
OOX::JsaProject* pJsaProject;
|
||||
|
||||
bool m_bWriteSectPr;//Записывать ли свойства верхнего уровня в данном экземпляре BinaryOtherTableWriter
|
||||
//---------------------------------
|
||||
@ -3022,6 +3023,7 @@ namespace BinDocxRW
|
||||
pBackground = NULL;
|
||||
pSectPr = NULL;
|
||||
poDocument = NULL;
|
||||
pJsaProject = NULL;
|
||||
m_bWriteSectPr = false;
|
||||
}
|
||||
void WriteVbaProject(OOX::VbaProject& oVbaProject)
|
||||
@ -3120,6 +3122,19 @@ namespace BinDocxRW
|
||||
m_oBcw.WriteItemEnd(nCurPos);
|
||||
}
|
||||
}
|
||||
//Write JsaProject
|
||||
if (NULL != pJsaProject)
|
||||
{
|
||||
BYTE* pData = NULL;
|
||||
DWORD nBytesCount;
|
||||
if(NSFile::CFileBinary::ReadAllBytes(pJsaProject->filename().GetPath(), &pData, nBytesCount))
|
||||
{
|
||||
nCurPos = m_oBcw.WriteItemStart(c_oSerParType::JsaProject);
|
||||
m_oBcw.m_oStream.WriteBYTEArray(pData, nBytesCount);
|
||||
m_oBcw.WriteItemEnd(nCurPos);
|
||||
RELEASEARRAYOBJECTS(pData);
|
||||
}
|
||||
}
|
||||
}
|
||||
void WriteBackground (OOX::Logic::CBackground* pBackground)
|
||||
{
|
||||
@ -7964,6 +7979,7 @@ namespace BinDocxRW
|
||||
oBinaryDocumentTableWriter.pSectPr = pFirstSectPr;
|
||||
oBinaryDocumentTableWriter.pBackground = oDocx.m_pDocument->m_oBackground.GetPointer();
|
||||
oBinaryDocumentTableWriter.poDocument = oDocx.m_pDocument;
|
||||
oBinaryDocumentTableWriter.pJsaProject = oDocx.m_pJsaProject;
|
||||
|
||||
oBinaryDocumentTableWriter.m_bWriteSectPr = true;
|
||||
|
||||
@ -7974,18 +7990,6 @@ namespace BinDocxRW
|
||||
oBinaryDocumentTableWriter.WriteVbaProject(*oDocx.m_pVbaProject);
|
||||
this->WriteTableEnd(nCurPos);
|
||||
}
|
||||
//Write JsaProject
|
||||
if (NULL != oDocx.m_pJsaProject)
|
||||
{
|
||||
BYTE* pData = NULL;
|
||||
DWORD nBytesCount;
|
||||
if(NSFile::CFileBinary::ReadAllBytes(oDocx.m_pJsaProject->filename().GetPath(), &pData, nBytesCount))
|
||||
{
|
||||
nCurPos = m_oBcw.WriteItemStart(c_oSerParType::JsaProject);
|
||||
m_oBcw.m_oStream.WriteBYTEArray(pData, nBytesCount);
|
||||
m_oBcw.WriteItemEnd(nCurPos);
|
||||
}
|
||||
}
|
||||
|
||||
// Write content
|
||||
nCurPos = this->WriteTableStart(BinDocxRW::c_oSerTableTypes::Document);
|
||||
@ -7993,7 +7997,7 @@ namespace BinDocxRW
|
||||
this->WriteTableEnd(nCurPos);
|
||||
|
||||
nCurPos = this->WriteTableStart(BinDocxRW::c_oSerTableTypes::HdrFtr);
|
||||
oBinaryHeaderFooterTableWriter.Write();
|
||||
oBinaryHeaderFooterTableWriter.Write();
|
||||
this->WriteTableEnd(nCurPos);
|
||||
|
||||
if(NULL != m_oParamsWriter.m_poTheme)
|
||||
|
||||
@ -60,7 +60,7 @@ void _mediaitems::add_or_find(const std::wstring & oox_ref, Type type, std::wst
|
||||
{
|
||||
std::wstring output_sub_path;
|
||||
std::wstring output_fileName;
|
||||
int number=0;
|
||||
size_t number = 0;
|
||||
|
||||
if (type == typeImage)
|
||||
{
|
||||
|
||||
@ -1101,7 +1101,7 @@ void odf_drawing_context::end_frame()
|
||||
/////////////////////
|
||||
void odf_drawing_context::start_element(office_element_ptr elm, office_element_ptr style_elm)
|
||||
{
|
||||
int level = (int)impl_->current_level_.size();
|
||||
size_t level = (int)impl_->current_level_.size();
|
||||
|
||||
if (impl_->current_level_.size() > 0 && elm)
|
||||
impl_->current_level_.back()->add_child_element(elm);
|
||||
|
||||
@ -490,7 +490,7 @@ void odf_lists_styles_context::set_numeric_format(std::wstring val)
|
||||
}
|
||||
if (r1 < 0) return;//??
|
||||
|
||||
r1 = s.find(L"%", 1);
|
||||
r1 = (int)s.find(L"%", 1);
|
||||
if (r1 >0)
|
||||
{
|
||||
int level = 0;
|
||||
@ -498,7 +498,7 @@ void odf_lists_styles_context::set_numeric_format(std::wstring val)
|
||||
int r2=0;
|
||||
while (r2 < s.length())
|
||||
{
|
||||
r2 = s.find(L"%", r2);
|
||||
r2 = (int)s.find(L"%", r2);
|
||||
if (r2 >=0) level++;
|
||||
else break;
|
||||
r2++;
|
||||
|
||||
@ -545,9 +545,9 @@ void odf_number_styles_context::create_currency_style(number_format_state & stat
|
||||
{
|
||||
create_element(L"number", L"currency-style", root_elm, odf_context_);
|
||||
{
|
||||
int res1 = state.format_code[0].rfind(L"]");
|
||||
int res2 = state.format_code[0].rfind(L"#");
|
||||
int res3 = state.format_code[0].rfind(L"0");
|
||||
int res1 = (int)state.format_code[0].rfind(L"]");
|
||||
int res2 = (int)state.format_code[0].rfind(L"#");
|
||||
int res3 = (int)state.format_code[0].rfind(L"0");
|
||||
|
||||
office_element_ptr elm_symbol;
|
||||
create_element(L"number", L"currency-symbol", elm_symbol, odf_context_);
|
||||
|
||||
@ -135,7 +135,7 @@ void odf_master_state::add_header(office_element_ptr & elm)
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
odf_layout_state::odf_layout_state(office_element_ptr & layout_elm )
|
||||
{
|
||||
int level =0;
|
||||
size_t level = 0;
|
||||
odf_element_state state = {layout_elm,L"",office_element_ptr(),level};
|
||||
|
||||
elements_.push_back(state);
|
||||
|
||||
@ -400,11 +400,11 @@ bool ods_table_state::is_cell_comment()
|
||||
|
||||
int ods_table_state::is_cell_hyperlink(int col, int row)
|
||||
{
|
||||
for (size_t i=0; i < hyperlinks_.size();i++)
|
||||
for (size_t i = 0; i < hyperlinks_.size(); i++)
|
||||
{
|
||||
if (hyperlinks_[i].col == col && hyperlinks_[i].row == row)
|
||||
{
|
||||
return i;
|
||||
return (int)i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
@ -415,7 +415,7 @@ int ods_table_state::is_cell_comment(int col, int row, short repeate_col)
|
||||
{
|
||||
if ((comments_[i].col < col + repeate_col && comments_[i].col >= col) && comments_[i].row == row)
|
||||
{
|
||||
return i;
|
||||
return (int)i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
|
||||
@ -200,7 +200,7 @@ void odt_conversion_context::add_text_content(const std::wstring & text)
|
||||
{
|
||||
if (drop_cap_state_.enabled)
|
||||
{
|
||||
int count = text.length();
|
||||
int count = (int)text.length();
|
||||
drop_cap_state_.characters += count;
|
||||
|
||||
style_text_properties * props = text_context()->get_text_properties();
|
||||
|
||||
@ -66,10 +66,10 @@ public:
|
||||
|
||||
virtual void serialize(std::wostream & _Wostream);
|
||||
|
||||
text_text(const std::wstring & Text) : text_(Text) {};
|
||||
text_text() {};
|
||||
text_text(const std::wstring & Text) : text_(Text) {}
|
||||
text_text() {}
|
||||
|
||||
std::wstring & attr_text() { return text_; };
|
||||
std::wstring & attr_text() { return text_; }
|
||||
|
||||
virtual void add_text(const std::wstring & Text);
|
||||
|
||||
@ -263,7 +263,7 @@ public:
|
||||
|
||||
public:
|
||||
text_reference_mark_start() {}
|
||||
text_reference_mark_start(const std::wstring & Name) : text_name_(Name){};
|
||||
text_reference_mark_start(const std::wstring & Name) : text_name_(Name){}
|
||||
|
||||
virtual void create_child_element(const std::wstring & Ns, const std::wstring & Name){}
|
||||
virtual void add_child_element( const office_element_ptr & child_element){}
|
||||
|
||||
@ -175,6 +175,7 @@ SOURCES += \
|
||||
../Reader/SlidePersist.cpp \
|
||||
../Records/Animations/AnimationTypes.cpp \
|
||||
../Records/Drawing/ArtBlip.cpp \
|
||||
../Records/Drawing/ShapeContainer.cpp \
|
||||
../PPTXWriter/Converter.cpp \
|
||||
../PPTXWriter/ShapeWriter.cpp
|
||||
}
|
||||
|
||||
@ -37,6 +37,7 @@
|
||||
#include "../Reader/SlidePersist.cpp"
|
||||
#include "../Records/Animations/AnimationTypes.cpp"
|
||||
#include "../Records/Drawing/ArtBlip.cpp"
|
||||
#include "../Records/Drawing/ShapeContainer.cpp"
|
||||
|
||||
#include "../PPTXWriter/Converter.cpp"
|
||||
#include "../PPTXWriter/ShapeWriter.cpp"
|
||||
|
||||
@ -285,115 +285,128 @@ void NSPresentationEditor::CPPTXWriter::WriteApp(CFile& oFile)
|
||||
{
|
||||
CP_XML_ATTR(L"xmlns", L"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties");
|
||||
CP_XML_ATTR(L"xmlns:vt", L"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypess");
|
||||
}
|
||||
CP_XML_NODE(L"Application")
|
||||
{
|
||||
CP_XML_STREAM() << 0;
|
||||
}
|
||||
#if defined(INTVER)
|
||||
CP_XML_NODE(L"AppVersion")
|
||||
{
|
||||
|
||||
CP_XML_NODE(L"TotalTime")
|
||||
{
|
||||
CP_XML_STREAM() << 0;
|
||||
}
|
||||
CP_XML_NODE(L"Words")
|
||||
{
|
||||
CP_XML_STREAM() << 0;
|
||||
}
|
||||
std::wstring sApplication = L"ONLYOFFICE";
|
||||
#if defined(INTVER)
|
||||
std::string s = VALUE2STR(INTVER);
|
||||
CP_XML_STREAM() << std::wstring(s.begin(), s.end());
|
||||
}
|
||||
#endif
|
||||
CP_XML_NODE(L"TotalTime")
|
||||
{
|
||||
CP_XML_STREAM() << 0;
|
||||
}
|
||||
CP_XML_NODE(L"Words")
|
||||
{
|
||||
CP_XML_STREAM() << 0;
|
||||
}
|
||||
CP_XML_NODE(L"PresentationFormat")
|
||||
{
|
||||
CP_XML_STREAM() << L"On-screen Show (4:3)";
|
||||
}
|
||||
CP_XML_NODE(L"Paragraphs")
|
||||
{
|
||||
CP_XML_STREAM() << 0;
|
||||
}
|
||||
CP_XML_NODE(L"Slides")
|
||||
{
|
||||
CP_XML_STREAM() << m_pDocument->m_arSlides.size();
|
||||
}
|
||||
CP_XML_NODE(L"Notes")
|
||||
{
|
||||
CP_XML_STREAM() << m_pDocument->m_arNotes.size();
|
||||
}
|
||||
CP_XML_NODE(L"HiddenSlides")
|
||||
{
|
||||
CP_XML_STREAM() << 0;
|
||||
}
|
||||
CP_XML_NODE(L"MMClips")
|
||||
{
|
||||
CP_XML_STREAM() << 2;
|
||||
}
|
||||
CP_XML_NODE(L"ScaleCrop")
|
||||
{
|
||||
CP_XML_STREAM() << L"false";
|
||||
}
|
||||
CP_XML_NODE(L"HeadingPairs")
|
||||
{
|
||||
CP_XML_NODE(L"vt:vector")
|
||||
sApplication += L"/" + std::wstring(s.begin(), s.end());
|
||||
#endif
|
||||
CP_XML_NODE(L"Application")
|
||||
{
|
||||
CP_XML_ATTR(L"size", 4);
|
||||
CP_XML_ATTR(L"baseType", L"variant");
|
||||
|
||||
CP_XML_NODE(L"vt:variant")
|
||||
{
|
||||
CP_XML_ATTR(L"vt:lpstr", L"Theme");
|
||||
}
|
||||
CP_XML_NODE(L"vt:variant")
|
||||
{
|
||||
CP_XML_ATTR(L"vt:i4", m_pDocument->m_arThemes.size());
|
||||
}
|
||||
CP_XML_NODE(L"vt:variant")
|
||||
{
|
||||
CP_XML_ATTR(L"vt:lpstr", L"Slide Titles");
|
||||
}
|
||||
CP_XML_NODE(L"vt:variant")
|
||||
{
|
||||
CP_XML_ATTR(L"vt:i4", m_pDocument->m_arSlides.size());
|
||||
}
|
||||
CP_XML_STREAM() << sApplication;
|
||||
}
|
||||
}
|
||||
CP_XML_NODE(L"TitlesOfParts")
|
||||
{
|
||||
CP_XML_NODE(L"vt:vector")
|
||||
//CP_XML_NODE(L"AppVersion")
|
||||
//{
|
||||
// CP_XML_STREAM() << L"1.0";
|
||||
//}
|
||||
CP_XML_NODE(L"Paragraphs")
|
||||
{
|
||||
CP_XML_ATTR(L"size", m_pDocument->m_arSlides.size() + m_pDocument->m_arThemes.size());
|
||||
CP_XML_ATTR(L"baseType", L"lpstr");
|
||||
|
||||
for (size_t i = 1; i <= m_pDocument->m_arThemes.size(); ++i)
|
||||
{
|
||||
CP_XML_NODE(L"vt:lpstr")
|
||||
{
|
||||
CP_XML_STREAM() << L"Theme " << i;
|
||||
}
|
||||
}
|
||||
for (size_t i = 1; i <= m_pDocument->m_arSlides.size(); ++i)
|
||||
{
|
||||
CP_XML_NODE(L"vt:lpstr")
|
||||
{
|
||||
CP_XML_STREAM() << L"Slide " << i;
|
||||
}
|
||||
}
|
||||
CP_XML_STREAM() << 0;
|
||||
}
|
||||
}
|
||||
//CP_XML_NODE(L"PresentationFormat")
|
||||
//{
|
||||
// CP_XML_STREAM() << L"On-screen Show (4:3)";
|
||||
//}
|
||||
//CP_XML_NODE(L"Slides")
|
||||
//{
|
||||
// CP_XML_STREAM() << m_pDocument->m_arSlides.size();
|
||||
//}
|
||||
//CP_XML_NODE(L"Notes")
|
||||
//{
|
||||
// CP_XML_STREAM() << m_pDocument->m_arNotes.size();
|
||||
//}
|
||||
//CP_XML_NODE(L"HiddenSlides")
|
||||
//{
|
||||
// CP_XML_STREAM() << 0;
|
||||
//}
|
||||
//CP_XML_NODE(L"MMClips")
|
||||
//{
|
||||
// CP_XML_STREAM() << 0;
|
||||
//}
|
||||
//CP_XML_NODE(L"ScaleCrop")
|
||||
//{
|
||||
// CP_XML_STREAM() << L"false";
|
||||
//}
|
||||
//CP_XML_NODE(L"HeadingPairs")
|
||||
//{
|
||||
// CP_XML_NODE(L"vt:vector")
|
||||
// {
|
||||
// CP_XML_ATTR(L"size", 4);
|
||||
// CP_XML_ATTR(L"baseType", L"variant");
|
||||
//
|
||||
// CP_XML_NODE(L"vt:variant")
|
||||
// {
|
||||
// CP_XML_NODE(L"vt:lpstr")
|
||||
// {
|
||||
// CP_XML_STREAM() << L"Theme";
|
||||
// }
|
||||
// }
|
||||
// CP_XML_NODE(L"vt:variant")
|
||||
// {
|
||||
// CP_XML_NODE(L"vt:i4")
|
||||
// {
|
||||
// CP_XML_STREAM() << m_pDocument->m_arThemes.size();
|
||||
// }
|
||||
// }
|
||||
// CP_XML_NODE(L"vt:variant")
|
||||
// {
|
||||
// CP_XML_NODE(L"vt:lpstr")
|
||||
// {
|
||||
// CP_XML_STREAM() << L"Slide Titles";
|
||||
// }
|
||||
// }
|
||||
// CP_XML_NODE(L"vt:variant")
|
||||
// {
|
||||
// CP_XML_NODE(L"vt:i4")
|
||||
// CP_XML_STREAM() << m_pDocument->m_arSlides.size();
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
//CP_XML_NODE(L"TitlesOfParts")
|
||||
//{
|
||||
// CP_XML_NODE(L"vt:vector")
|
||||
// {
|
||||
// CP_XML_ATTR(L"size", m_pDocument->m_arSlides.size() + m_pDocument->m_arThemes.size());
|
||||
// CP_XML_ATTR(L"baseType", L"lpstr");
|
||||
|
||||
CP_XML_NODE(L"Company");
|
||||
CP_XML_NODE(L"LinksUpToDate")
|
||||
{
|
||||
CP_XML_STREAM() << L"false";
|
||||
}
|
||||
CP_XML_NODE(L"SharedDoc")
|
||||
{
|
||||
CP_XML_STREAM() << L"false";
|
||||
}
|
||||
CP_XML_NODE(L"HyperlinksChanged")
|
||||
{
|
||||
CP_XML_STREAM() << L"false";
|
||||
// for (size_t i = 1; i <= m_pDocument->m_arThemes.size(); ++i)
|
||||
// {
|
||||
// CP_XML_NODE(L"vt:lpstr")
|
||||
// {
|
||||
// CP_XML_STREAM() << L"Theme " << i;
|
||||
// }
|
||||
// }
|
||||
// for (size_t i = 1; i <= m_pDocument->m_arSlides.size(); ++i)
|
||||
// {
|
||||
// CP_XML_NODE(L"vt:lpstr")
|
||||
// {
|
||||
// CP_XML_STREAM() << L"Slide " << i;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
////CP_XML_NODE(L"Company");
|
||||
//CP_XML_NODE(L"LinksUpToDate")
|
||||
//{
|
||||
// CP_XML_STREAM() << L"false";
|
||||
//}
|
||||
//CP_XML_NODE(L"SharedDoc")
|
||||
//{
|
||||
// CP_XML_STREAM() << L"false";
|
||||
//}
|
||||
//CP_XML_NODE(L"HyperlinksChanged")
|
||||
//{
|
||||
// CP_XML_STREAM() << L"false";
|
||||
//}
|
||||
}
|
||||
}
|
||||
oFile.WriteStringUTF8(L"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
|
||||
@ -577,14 +590,14 @@ void NSPresentationEditor::CPPTXWriter::WriteThemes()
|
||||
|
||||
for (size_t i = 0; i < m_pDocument->m_arThemes.size(); i++)
|
||||
{
|
||||
WriteTheme(&m_pDocument->m_arThemes[i], nIndexTheme, nStartLayout, 1);
|
||||
WriteTheme(&m_pDocument->m_arThemes[i], nIndexTheme, nStartLayout);
|
||||
}
|
||||
|
||||
WriteTheme(m_pDocument->m_pNotesMaster, nIndexTheme, nStartLayout, 2);
|
||||
WriteTheme(m_pDocument->m_pHandoutMaster, nIndexTheme, nStartLayout, 3);
|
||||
WriteTheme(m_pDocument->m_pNotesMaster, nIndexTheme, nStartLayout);
|
||||
WriteTheme(m_pDocument->m_pHandoutMaster, nIndexTheme, nStartLayout);
|
||||
}
|
||||
|
||||
void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexTheme, int & nStartLayout, int Type)
|
||||
void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexTheme, int & nStartLayout)
|
||||
{
|
||||
if (!pTheme) return;
|
||||
|
||||
@ -665,15 +678,15 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
|
||||
CStringWriter oWriter;
|
||||
oWriter.WriteString(L"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>");
|
||||
|
||||
if (Type == 1)
|
||||
if (pTheme->m_eType == typeMaster)
|
||||
{
|
||||
oWriter.WriteString(L"<p:sldMaster xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\">");
|
||||
}
|
||||
else if (Type == 2)
|
||||
else if (pTheme->m_eType == typeNotesMaster)
|
||||
{
|
||||
oWriter.WriteString(L"<p:notesMaster xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\">");
|
||||
}
|
||||
else if (Type == 3)
|
||||
else if (pTheme->m_eType == typeHandoutMaster)
|
||||
{
|
||||
oWriter.WriteString(L"<p:handoutMaster xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\">");
|
||||
}
|
||||
@ -688,12 +701,22 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
|
||||
|
||||
for (size_t nEl = 0; nEl < pTheme->m_arElements.size(); ++nEl)
|
||||
{
|
||||
if (!pTheme->m_arElements[nEl]) continue;
|
||||
|
||||
if (isBodyPlaceholder(pTheme->m_arElements[nEl]->m_lPlaceholderType))
|
||||
pTheme->m_arElements[nEl]->m_lPlaceholderType =100; //body тип прописывать !!
|
||||
pTheme->m_arElements[nEl]->m_lPlaceholderType = 100; //body тип прописывать !!
|
||||
|
||||
if (pTheme->m_arElements[nEl]->m_bBoundsEnabled == false)
|
||||
continue;
|
||||
|
||||
if (pTheme->m_eType == typeNotesMaster ||
|
||||
pTheme->m_eType == typeHandoutMaster)
|
||||
{
|
||||
pTheme->m_arElements[nEl]->m_lPlaceholderID = -1;
|
||||
|
||||
if (pTheme->m_eType == typeHandoutMaster)
|
||||
pTheme->m_arElements[nEl]->m_lPlaceholderSizePreset = -1;
|
||||
}
|
||||
WriteElement(oWriter, oRels, pTheme->m_arElements[nEl]);
|
||||
}
|
||||
|
||||
@ -702,7 +725,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
|
||||
std::wstring strOverrideColorScheme = _T("<p:clrMap bg1=\"lt1\" tx1=\"dk1\" bg2=\"lt2\" tx2=\"dk2\" accent1=\"accent1\" accent2=\"accent2\" accent3=\"accent3\" accent4=\"accent4\" accent5=\"accent5\" accent6=\"accent6\" hlink=\"hlink\" folHlink=\"folHlink\"/>");
|
||||
oWriter.WriteString(strOverrideColorScheme);
|
||||
|
||||
if (Type == 1)
|
||||
if (pTheme->m_eType == typeMaster)
|
||||
{
|
||||
oWriter.WriteString(std::wstring(L"<p:sldLayoutIdLst>"));
|
||||
|
||||
@ -726,7 +749,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
|
||||
if (!pTheme->m_bHasFooter) oWriter.WriteString(std::wstring(L" ftr=\"0\""));
|
||||
oWriter.WriteString(std::wstring(L"/>"));
|
||||
}
|
||||
if (Type == 1)
|
||||
if (pTheme->m_eType == typeMaster)
|
||||
{
|
||||
oWriter.WriteString(std::wstring(L"<p:txStyles>"));
|
||||
|
||||
@ -744,7 +767,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
|
||||
|
||||
oWriter.WriteString(std::wstring(L"</p:txStyles>"));
|
||||
}
|
||||
else if (Type == 2)
|
||||
else if (pTheme->m_eType == typeNotesMaster)
|
||||
{
|
||||
oWriter.WriteString(std::wstring(L"<p:notesStyle>"));
|
||||
CStylesWriter::ConvertStyles(pTheme->m_pStyles[1], pTheme->m_oInfo, oWriter, 9);
|
||||
@ -753,7 +776,8 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
|
||||
|
||||
std::wstring strSlideMasterFile;
|
||||
std::wstring strSlideMasterRelsFile;
|
||||
if (Type == 1)
|
||||
|
||||
if (pTheme->m_eType == typeMaster)
|
||||
{
|
||||
oWriter.WriteString(std::wstring(L"</p:sldMaster>"));
|
||||
|
||||
@ -763,7 +787,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
|
||||
strSlideMasterRelsFile = L"slideMaster" + std::to_wstring(nIndexTheme + 1) + L".xml.rels";
|
||||
strSlideMasterRelsFile = strPptDirectory + _T("slideMasters") + FILE_SEPARATOR_STR + _T("_rels") + FILE_SEPARATOR_STR + strSlideMasterRelsFile;
|
||||
}
|
||||
else if (Type == 2)
|
||||
else if (pTheme->m_eType == typeNotesMaster)
|
||||
{
|
||||
oWriter.WriteString(std::wstring(L"</p:notesMaster>"));
|
||||
|
||||
@ -773,7 +797,7 @@ void NSPresentationEditor::CPPTXWriter::WriteTheme(CTheme* pTheme, int & nIndexT
|
||||
strSlideMasterRelsFile = L"notesMaster1.xml.rels";
|
||||
strSlideMasterRelsFile = strPptDirectory + _T("notesMasters") + FILE_SEPARATOR_STR + _T("_rels") + FILE_SEPARATOR_STR + strSlideMasterRelsFile;
|
||||
}
|
||||
else if (Type == 3)
|
||||
else if (pTheme->m_eType == typeHandoutMaster)
|
||||
{
|
||||
oWriter.WriteString(std::wstring(L"</p:handoutMaster>"));
|
||||
|
||||
@ -933,6 +957,11 @@ void NSPresentationEditor::CPPTXWriter::WriteLayout(CLayout& oLayout, int nIndex
|
||||
size_t nElements = oLayout.m_arElements.size();
|
||||
for (size_t nEl = 0; nEl < nElements; ++nEl)
|
||||
{
|
||||
if (!oLayout.m_arElements[nEl])
|
||||
continue;
|
||||
|
||||
oLayout.m_arElements[nEl]->m_lPlaceholderSizePreset = -1;
|
||||
|
||||
WriteElement(oWriter, oRels, oLayout.m_arElements[nEl]);
|
||||
}
|
||||
|
||||
|
||||
@ -72,7 +72,7 @@ namespace NSPresentationEditor
|
||||
void WritePresInfo ();
|
||||
void WriteAll ();
|
||||
void WriteThemes ();
|
||||
void WriteTheme (CTheme* pTheme, int & nIndexTheme, int & nStartLayout, int Type = 1);
|
||||
void WriteTheme (CTheme* pTheme, int & nIndexTheme, int & nStartLayout);
|
||||
void WriteSlides ();
|
||||
void WriteNotes ();
|
||||
void WriteLayout (CLayout& oLayout, int nIndexLayout, int nStartLayout, int nIndexTheme);
|
||||
|
||||
@ -35,6 +35,11 @@
|
||||
#include "../../../ASCOfficeXlsFile2/source/XlsXlsxConverter/ShapeType.h"
|
||||
#include "../../../Common/MS-LCID.h"
|
||||
|
||||
|
||||
#include "../../../ASCOfficePPTXFile/PPTXFormat/Logic/SpTreeElem.h"
|
||||
#include "../../../ASCOfficePPTXFile/PPTXFormat/Logic/Shape.h"
|
||||
#include "../../../ASCOfficePPTXFile/PPTXFormat/Logic/SpTree.h"
|
||||
|
||||
#ifndef EMU_MM
|
||||
#define EMU_MM 36000.0
|
||||
#endif
|
||||
@ -307,6 +312,10 @@ bool NSPresentationEditor::CShapeWriter::SetElement(CElementPtr pElem)
|
||||
m_bWordArt = false;
|
||||
m_bTextBox = false;
|
||||
|
||||
m_xmlGeomAlternative.clear();
|
||||
m_xmlTxBodyAlternative.clear();
|
||||
m_xmlAlternative.clear();
|
||||
|
||||
if (m_pShapeElement)
|
||||
{
|
||||
m_pShapeElement->m_pShape->GetTextRect(m_oTextRect);
|
||||
@ -811,6 +820,13 @@ void NSPresentationEditor::CShapeWriter::WriteTextInfo()
|
||||
{
|
||||
size_t nCount = m_pShapeElement->m_pShape->m_oText.m_arParagraphs.size();
|
||||
|
||||
if (false == m_xmlTxBodyAlternative.empty())
|
||||
{
|
||||
m_oWriter.WriteString(m_xmlTxBodyAlternative);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
m_oWriter.WriteString(std::wstring(L"<p:txBody>"));
|
||||
|
||||
m_oWriter.WriteString(std::wstring(L"<a:bodyPr" ));
|
||||
@ -1227,6 +1243,59 @@ void NSPresentationEditor::CShapeWriter::WriteTextInfo()
|
||||
m_oWriter.WriteString(str5);
|
||||
}
|
||||
|
||||
void NSPresentationEditor::CShapeWriter::ParseXmlAlternative(const std::wstring & xml)
|
||||
{
|
||||
XmlUtils::CXmlLiteReader oReader;
|
||||
|
||||
if ( !oReader.FromString( xml ) )
|
||||
return;
|
||||
if ( !oReader.ReadNextNode() )
|
||||
return;
|
||||
|
||||
std::wstring sName = XmlUtils::GetNameNoNS(oReader.GetName());
|
||||
|
||||
nullable<PPTX::Logic::SpTreeElem> oElement;
|
||||
|
||||
if ( L"graphicFrame" == sName || L"pic" == sName || L"sp" == sName || L"grpSp" == sName || L"cxnSp" == sName || L"AlternateContent" == sName
|
||||
|| L"spTree" )
|
||||
{
|
||||
oElement = oReader;
|
||||
}
|
||||
|
||||
if (oElement.IsInit())
|
||||
{
|
||||
smart_ptr<PPTX::Logic::Shape> shape = oElement->GetElem().smart_dynamic_cast<PPTX::Logic::Shape>();
|
||||
if (shape.IsInit())
|
||||
{
|
||||
NSBinPptxRW::CXmlWriter writer(XMLWRITER_DOC_TYPE_PPTX);
|
||||
shape->spPr.Geometry.toXmlWriter(&writer);
|
||||
|
||||
if (shape->spPr.scene3d.IsInit())
|
||||
shape->spPr.scene3d->toXmlWriter(&writer);
|
||||
|
||||
if (shape->spPr.sp3d.IsInit())
|
||||
shape->spPr.sp3d->toXmlWriter(&writer);
|
||||
|
||||
m_xmlGeomAlternative = writer.GetXmlString();
|
||||
|
||||
writer.ClearNoAttack();
|
||||
if ((shape->txBody.IsInit()) && (shape->txBody->bodyPr.IsInit()) && (shape->txBody->bodyPr->prstTxWarp.IsInit()))
|
||||
{//только WordArt
|
||||
shape->txBody->toXmlWriter(&writer);
|
||||
m_xmlTxBodyAlternative = writer.GetXmlString();
|
||||
}
|
||||
}
|
||||
smart_ptr<PPTX::Logic::SpTree> groupShape = oElement->GetElem().smart_dynamic_cast<PPTX::Logic::SpTree>();
|
||||
if (groupShape.IsInit())
|
||||
{//smartArt
|
||||
NSBinPptxRW::CXmlWriter writer(XMLWRITER_DOC_TYPE_PPTX);
|
||||
groupShape->toXmlWriter(&writer);
|
||||
|
||||
m_xmlAlternative = writer.GetXmlString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::wstring NSPresentationEditor::CShapeWriter::ConvertShape()
|
||||
{
|
||||
if (m_pImageElement) return ConvertImage();
|
||||
@ -1256,6 +1325,12 @@ std::wstring NSPresentationEditor::CShapeWriter::ConvertShape()
|
||||
m_pShapeElement->m_bLine = true;
|
||||
}
|
||||
|
||||
|
||||
if (m_pShapeElement->m_pShape && !m_pShapeElement->m_pShape->m_strXmlString.empty())
|
||||
{
|
||||
ParseXmlAlternative(m_pShapeElement->m_pShape->m_strXmlString);
|
||||
}
|
||||
|
||||
m_oWriter.WriteString(std::wstring(L"<p:sp>"));
|
||||
|
||||
WriteShapeInfo();
|
||||
@ -1313,30 +1388,37 @@ std::wstring NSPresentationEditor::CShapeWriter::ConvertShape()
|
||||
m_pShapeElement->m_pShape->ToRenderer(dynamic_cast<IRenderer*>(this), oInfo, m_oMetricInfo, 0.0, 1.0);
|
||||
}
|
||||
|
||||
if ((prstGeom.empty() == false || m_pShapeElement->m_bShapePreset) && prstTxWarp.empty() && !shape->m_bCustomShape)
|
||||
if (!m_xmlGeomAlternative.empty())
|
||||
{
|
||||
if (prstGeom.empty()) prstGeom = L"rect";
|
||||
m_oWriter.WriteString(std::wstring(L"<a:prstGeom"));
|
||||
{
|
||||
m_oWriter.WriteString(std::wstring(L" prst=\"") + prstGeom + std::wstring(L"\">"));
|
||||
if (!m_bWordArt)
|
||||
{
|
||||
m_oWriter.WriteString(std::wstring(L"<a:avLst/>"));
|
||||
}
|
||||
}
|
||||
m_oWriter.WriteString(std::wstring(L"</a:prstGeom>"));
|
||||
}
|
||||
else if (prstTxWarp.empty())
|
||||
{
|
||||
m_oWriter.WriteString(m_pShapeElement->ConvertPPTShapeToPPTX());
|
||||
m_oWriter.WriteString(m_xmlGeomAlternative);
|
||||
}
|
||||
else
|
||||
{
|
||||
//word art
|
||||
m_oWriter.WriteString(std::wstring(L"<a:prstGeom prst=\"rect\"/>"));
|
||||
if ((prstGeom.empty() == false || m_pShapeElement->m_bShapePreset) && prstTxWarp.empty() && !shape->m_bCustomShape)
|
||||
{
|
||||
if (prstGeom.empty()) prstGeom = L"rect";
|
||||
m_oWriter.WriteString(std::wstring(L"<a:prstGeom"));
|
||||
{
|
||||
m_oWriter.WriteString(std::wstring(L" prst=\"") + prstGeom + std::wstring(L"\">"));
|
||||
if (!m_bWordArt)
|
||||
{
|
||||
m_oWriter.WriteString(std::wstring(L"<a:avLst/>"));
|
||||
}
|
||||
}
|
||||
m_oWriter.WriteString(std::wstring(L"</a:prstGeom>"));
|
||||
}
|
||||
else if (prstTxWarp.empty())
|
||||
{
|
||||
m_oWriter.WriteString(m_pShapeElement->ConvertPPTShapeToPPTX());
|
||||
}
|
||||
else
|
||||
{
|
||||
//word art
|
||||
m_oWriter.WriteString(std::wstring(L"<a:prstGeom prst=\"rect\"/>"));
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_bWordArt)
|
||||
if (false == m_bWordArt)
|
||||
{
|
||||
m_oWriter.WriteString(ConvertBrush(m_pShapeElement->m_oBrush));
|
||||
if (m_pShapeElement->m_bLine)
|
||||
|
||||
@ -138,6 +138,8 @@ namespace NSPresentationEditor
|
||||
class CShapeWriter : public IRenderer
|
||||
{
|
||||
private:
|
||||
void ParseXmlAlternative(const std::wstring & xml);
|
||||
|
||||
NSPresentationEditor::CStringWriter m_oWriterPath;
|
||||
NSPresentationEditor::CStringWriter m_oWriterVML;
|
||||
NSPresentationEditor::CStringWriter m_oWriter;
|
||||
@ -154,6 +156,10 @@ namespace NSPresentationEditor
|
||||
|
||||
bool m_bWordArt;
|
||||
bool m_bTextBox;
|
||||
|
||||
std::wstring m_xmlGeomAlternative;
|
||||
std::wstring m_xmlTxBodyAlternative;
|
||||
std::wstring m_xmlAlternative;
|
||||
public:
|
||||
|
||||
CShapeWriter();
|
||||
|
||||
@ -1559,7 +1559,7 @@ void CPPTUserInfo::LoadMasters(const LONG& lOriginWidth, const LONG& lOriginHeig
|
||||
{
|
||||
std::map<DWORD, CRecordSlide*>::iterator pPair = m_mapNotesMasters.begin();
|
||||
|
||||
LoadMaster(pPair->second, m_pNotesMasterWrapper, m_pNotesMaster);
|
||||
LoadMaster(typeNotesMaster, pPair->second, m_pNotesMasterWrapper, m_pNotesMaster);
|
||||
}
|
||||
|
||||
LoadHandoutMasterFromPrevUsers(0);
|
||||
@ -1567,10 +1567,10 @@ void CPPTUserInfo::LoadMasters(const LONG& lOriginWidth, const LONG& lOriginHeig
|
||||
{
|
||||
std::map<DWORD, CRecordSlide*>::iterator pPair = m_mapHandoutMasters.begin();
|
||||
|
||||
LoadMaster(pPair->second, m_pHandoutMasterWrapper, m_pHandoutMaster);
|
||||
LoadMaster(typeHandoutMaster, pPair->second, m_pHandoutMasterWrapper, m_pHandoutMaster);
|
||||
}
|
||||
}
|
||||
void CPPTUserInfo::LoadMaster(CRecordSlide* pMaster, CSlideInfo *& pMasterWrapper, CTheme *& pTheme)
|
||||
void CPPTUserInfo::LoadMaster(_typeMaster type, CRecordSlide* pMaster, CSlideInfo *& pMasterWrapper, CTheme *& pTheme)
|
||||
{
|
||||
if (pMaster == NULL)
|
||||
return;
|
||||
@ -1609,7 +1609,7 @@ void CPPTUserInfo::LoadMaster(CRecordSlide* pMaster, CSlideInfo *& pMasterWrappe
|
||||
}
|
||||
}
|
||||
|
||||
pTheme = new CTheme();
|
||||
pTheme = new CTheme(type);
|
||||
|
||||
pTheme->m_lOriginalWidth = lOriginWidth;
|
||||
pTheme->m_lOriginalHeight = lOriginHeight;
|
||||
@ -1970,8 +1970,8 @@ void CPPTUserInfo::LoadNoMainMaster(DWORD dwMasterID, const LONG& lOriginWidth,
|
||||
{
|
||||
if (pElement->m_lPlaceholderID >=0)
|
||||
{
|
||||
if (pElement->m_lPlaceholderType == MasterSlideNumber) pLayout->m_bHasSlideNumber = true;
|
||||
if (pElement->m_lPlaceholderType == MasterDate) pLayout->m_bHasDate = true;
|
||||
if (pElement->m_lPlaceholderType == MasterSlideNumber) pLayout->m_bHasSlideNumber = true;
|
||||
if (pElement->m_lPlaceholderType == MasterDate) pLayout->m_bHasDate = true;
|
||||
if (pElement->m_lPlaceholderType == MasterFooter) pLayout->m_bHasFooter = true;
|
||||
}
|
||||
pLayout->m_mapPlaceholders.insert(std::pair<int, int>(pElement->m_lPlaceholderType, pLayout->m_arElements.size()-1));
|
||||
|
||||
@ -44,6 +44,7 @@ using namespace NSPresentationEditor;
|
||||
class CPPTUserInfo : public CDocument
|
||||
{
|
||||
public:
|
||||
|
||||
CUserEdit m_oUser;
|
||||
std::map<DWORD, DWORD> m_mapOffsetInPIDs;
|
||||
CRecordDocument m_oDocument;
|
||||
@ -145,7 +146,7 @@ public:
|
||||
void LoadNoMainMaster (DWORD dwMasterID, const LONG& lOriginWidth, const LONG& lOriginHeight);
|
||||
void LoadMainMaster (DWORD dwMasterID, const LONG& lOriginWidth, const LONG& lOriginHeight);
|
||||
|
||||
void LoadMaster(CRecordSlide* pMaster, CSlideInfo *& pMasterWrapper, CTheme *& pTheme);
|
||||
void LoadMaster(_typeMaster type, CRecordSlide* pMaster, CSlideInfo *& pMasterWrapper, CTheme *& pTheme);
|
||||
|
||||
void LoadSlideFromPrevUsers (DWORD dwSlideID);
|
||||
void LoadMasterFromPrevUsers (DWORD dwSlideID);
|
||||
|
||||
@ -59,109 +59,7 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
virtual void ReadFromStream(SRecordHeader & oHeader, POLE::Stream* pStream)
|
||||
{
|
||||
CRecordsContainer::ReadFromStream(oHeader, pStream);
|
||||
virtual void ReadFromStream(SRecordHeader & oHeader, POLE::Stream* pStream);
|
||||
|
||||
// вот... а теперь нужно взять и узнать перерасчет системы координат
|
||||
std::vector<CRecordShapeContainer*> oArrayShapes;
|
||||
GetRecordsByType(&oArrayShapes, false, false);
|
||||
|
||||
if (!oArrayShapes.empty())
|
||||
oArrayShapes[0]->bGroupShape = true;//тут описание самой группы
|
||||
|
||||
int nIndexBreak = -1;
|
||||
for (size_t nIndex = 0; nIndex < oArrayShapes.size(); ++nIndex)
|
||||
{
|
||||
std::vector<CRecordGroupShape*> oArrayGroupShapes;
|
||||
oArrayShapes[nIndex]->GetRecordsByType(&oArrayGroupShapes, false, true);
|
||||
|
||||
if ( oArrayGroupShapes.size() > 0 )
|
||||
{
|
||||
m_rcGroupBounds.left = oArrayGroupShapes[0]->m_oBounds.left;
|
||||
m_rcGroupBounds.top = oArrayGroupShapes[0]->m_oBounds.top;
|
||||
m_rcGroupBounds.right = oArrayGroupShapes[0]->m_oBounds.right;
|
||||
m_rcGroupBounds.bottom = oArrayGroupShapes[0]->m_oBounds.bottom;
|
||||
|
||||
std::vector<CRecordClientAnchor*> oArrayClients;
|
||||
oArrayShapes[nIndex]->GetRecordsByType(&oArrayClients, false, true);
|
||||
|
||||
if ( oArrayClients.size() > 0)
|
||||
{
|
||||
m_rcGroupClientAnchor.left = (LONG)oArrayClients[0]->m_oBounds.Left;
|
||||
m_rcGroupClientAnchor.top = (LONG)oArrayClients[0]->m_oBounds.Top;
|
||||
m_rcGroupClientAnchor.right = (LONG)oArrayClients[0]->m_oBounds.Right;
|
||||
m_rcGroupClientAnchor.bottom = (LONG)oArrayClients[0]->m_oBounds.Bottom;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<CRecordChildAnchor*> oArrayChilds;
|
||||
oArrayShapes[nIndex]->GetRecordsByType(&oArrayChilds, false, true);
|
||||
|
||||
if ( oArrayChilds.size() > 0)
|
||||
{
|
||||
m_rcGroupClientAnchor.left = (LONG)oArrayChilds[0]->m_oBounds.left;
|
||||
m_rcGroupClientAnchor.top = (LONG)oArrayChilds[0]->m_oBounds.top;
|
||||
m_rcGroupClientAnchor.right = (LONG)oArrayChilds[0]->m_oBounds.right;
|
||||
m_rcGroupClientAnchor.bottom = (LONG)oArrayChilds[0]->m_oBounds.bottom;
|
||||
}
|
||||
}
|
||||
|
||||
nIndexBreak = nIndex;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LONG lW1 = m_rcGroupBounds.right - m_rcGroupBounds.left;
|
||||
LONG lH1 = m_rcGroupBounds.bottom - m_rcGroupBounds.top;
|
||||
LONG lW2 = m_rcGroupClientAnchor.right - m_rcGroupClientAnchor.left;
|
||||
LONG lH2 = m_rcGroupClientAnchor.bottom - m_rcGroupClientAnchor.top;
|
||||
|
||||
bool bIsRecalc = ((lW1 > 0) && (lH1 > 0) && (lW2 > 0) && (lH2 > 0));
|
||||
if (bIsRecalc)
|
||||
{
|
||||
for (size_t nIndex = 0; nIndex < oArrayShapes.size(); ++nIndex)
|
||||
{
|
||||
if (nIndex != nIndexBreak)
|
||||
{
|
||||
oArrayShapes[nIndex]->m_pGroupBounds = &m_rcGroupBounds;
|
||||
oArrayShapes[nIndex]->m_pGroupClientAnchor = &m_rcGroupClientAnchor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SetGroupRect()
|
||||
{
|
||||
std::vector<CRecordGroupShapeContainer*> oArrayGroupContainer;
|
||||
this->GetRecordsByType(&oArrayGroupContainer, false, false);
|
||||
|
||||
int nCountGroups = oArrayGroupContainer.size();
|
||||
for (int i = 0; i < nCountGroups; ++i)
|
||||
{
|
||||
LONG lWidthGroup = m_rcGroupBounds.right - m_rcGroupBounds.left;
|
||||
LONG lHeightGroup = m_rcGroupBounds.bottom - m_rcGroupBounds.top;
|
||||
LONG lWidthClient = m_rcGroupClientAnchor.right - m_rcGroupClientAnchor.left;
|
||||
LONG lHeightClient = m_rcGroupClientAnchor.bottom - m_rcGroupClientAnchor.top;
|
||||
|
||||
bool bIsRecalc = ((lWidthClient > 0) && (lHeightClient > 0) && (lWidthGroup > 0) && (lHeightGroup > 0));
|
||||
|
||||
if (bIsRecalc)
|
||||
{
|
||||
// здесь переводим координаты, чтобы они не зависили от группы
|
||||
double dScaleX = (double)(lWidthClient) / (lWidthGroup);
|
||||
double dScaleY = (double)(lHeightClient) / (lHeightGroup);
|
||||
|
||||
RECT* prcChildAnchor = &oArrayGroupContainer[i]->m_rcGroupClientAnchor;
|
||||
|
||||
prcChildAnchor->left = m_rcGroupClientAnchor.left + (LONG)(dScaleX * (prcChildAnchor->left - m_rcGroupBounds.left));
|
||||
prcChildAnchor->right = m_rcGroupClientAnchor.left + (LONG)(dScaleX * (prcChildAnchor->right - m_rcGroupBounds.left));
|
||||
|
||||
prcChildAnchor->top = m_rcGroupClientAnchor.top + (LONG)(dScaleY * (prcChildAnchor->top - m_rcGroupBounds.top));
|
||||
prcChildAnchor->bottom = m_rcGroupClientAnchor.top + (LONG)(dScaleY * (prcChildAnchor->bottom - m_rcGroupBounds.top));
|
||||
}
|
||||
|
||||
oArrayGroupContainer[i]->SetGroupRect();
|
||||
}
|
||||
}
|
||||
void SetGroupRect();
|
||||
};
|
||||
|
||||
2405
ASCOfficePPTFile/PPTFormatLib/Records/Drawing/ShapeContainer.cpp
Normal file
2405
ASCOfficePPTFile/PPTFormatLib/Records/Drawing/ShapeContainer.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2238
ASCOfficePPTFile/PPTFormatLib/Records/Drawing/ShapeContainer.h
Normal file → Executable file
2238
ASCOfficePPTFile/PPTFormatLib/Records/Drawing/ShapeContainer.h
Normal file → Executable file
File diff suppressed because it is too large
Load Diff
@ -341,138 +341,6 @@
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Common"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\ASCOfficeXlsFile2\source\XlsFormat\Binary\CFRecord.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\ASCOfficeXlsFile2\source\XlsFormat\Binary\CFStream.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\XlsxSerializerCom\Reader\ChartFromToBinary.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\ASCOfficeDocxFile2\DocWrapper\ChartWriter.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\XlsxSerializerCom\Common\Common.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\XlsxSerializerCom\Reader\CommonWriter.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\XlsxSerializerCom\Reader\CSVReader.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\XlsxSerializerCom\Writer\CSVWriter.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\ASCOfficeDocxFile2\DocWrapper\DocxSerializer.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalOptions="/bigobj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalOptions="/bigobj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalOptions="/bigobj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\Common\FileDownloader\FileDownloader_win.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\DesktopEditor\fontengine\FontManager.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalOptions="/bigobj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\ASCOfficeDocxFile2\DocWrapper\FontProcessor.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\UnicodeConverter\UnicodeConverter.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\ASCOfficeDocxFile2\DocWrapper\XlsxSerializer.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalOptions="/bigobj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalOptions="/bigobj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalOptions="/bigobj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\ASCOfficePPTXFile\Editor\xmlwriter.h"
|
||||
>
|
||||
</File>
|
||||
<Filter
|
||||
Name="pole"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\Common\3dParty\pole\pole.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\Common\3dParty\pole\pole.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Reader"
|
||||
>
|
||||
@ -935,6 +803,10 @@
|
||||
RelativePath="..\Records\Drawing\Shape.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Records\Drawing\ShapeContainer.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Records\Drawing\ShapeContainer.h"
|
||||
>
|
||||
|
||||
@ -68,14 +68,6 @@ EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpsd", "..\..\DesktopEditor\cximage\libpsd\libpsd_vs2005.vcproj", "{9A037A69-D1DF-4505-AB2A-6CB3641C476E}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PPTFormatLib", "..\PPTFormatLib\Win32\PPTFormatLib.vcproj", "{7B27E40E-F70A-4A74-A77C-0944D7931D15}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{21663823-DE45-479B-91D0-B4FEF4916EF0} = {21663823-DE45-479B-91D0-B4FEF4916EF0}
|
||||
{9CAA294E-58C3-4CEB-ABA0-CB9786CA5540} = {9CAA294E-58C3-4CEB-ABA0-CB9786CA5540}
|
||||
{37CA072A-5BDE-498B-B3A7-5E404F5F9BF2} = {37CA072A-5BDE-498B-B3A7-5E404F5F9BF2}
|
||||
{C739151F-5384-41DF-A1A6-F089E2C1AD56} = {C739151F-5384-41DF-A1A6-F089E2C1AD56}
|
||||
{A100103A-353E-45E8-A9B8-90B87CC5C0B0} = {A100103A-353E-45E8-A9B8-90B87CC5C0B0}
|
||||
{36636678-AE25-4BE6-9A34-2561D1BCF302} = {36636678-AE25-4BE6-9A34-2561D1BCF302}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jbig", "..\..\DesktopEditor\cximage\jbig\jbig_vs2005.vcproj", "{764C3A2D-FB0F-428E-B1C7-62D1DD2CE239}"
|
||||
EndProject
|
||||
@ -85,15 +77,15 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OfficeUtilsLib", "..\..\Off
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PptFormatTest", "PptFormatTest.vcproj", "{0F49D5D1-A8D3-4F97-8BC1-E2F65BB00C10}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{7B27E40E-F70A-4A74-A77C-0944D7931D15} = {7B27E40E-F70A-4A74-A77C-0944D7931D15}
|
||||
{3423EC9A-52E4-4A4D-9753-EDEBC38785EF} = {3423EC9A-52E4-4A4D-9753-EDEBC38785EF}
|
||||
{C27E9A9F-3A17-4482-9C5F-BF15C01E747C} = {C27E9A9F-3A17-4482-9C5F-BF15C01E747C}
|
||||
{37CA072A-5BDE-498B-B3A7-5E404F5F9BF2} = {37CA072A-5BDE-498B-B3A7-5E404F5F9BF2}
|
||||
{9CAA294E-58C3-4CEB-ABA0-CB9786CA5540} = {9CAA294E-58C3-4CEB-ABA0-CB9786CA5540}
|
||||
{36636678-AE25-4BE6-9A34-2561D1BCF302} = {36636678-AE25-4BE6-9A34-2561D1BCF302}
|
||||
{F8274B05-168E-4D6E-B843-AA7510725363} = {F8274B05-168E-4D6E-B843-AA7510725363}
|
||||
{21663823-DE45-479B-91D0-B4FEF4916EF0} = {21663823-DE45-479B-91D0-B4FEF4916EF0}
|
||||
{3423EC9A-52E4-4A4D-9753-EDEBC38785EF} = {3423EC9A-52E4-4A4D-9753-EDEBC38785EF}
|
||||
{77DDC8D7-5B12-4FF2-9629-26AEBCA8436D} = {77DDC8D7-5B12-4FF2-9629-26AEBCA8436D}
|
||||
{C27E9A9F-3A17-4482-9C5F-BF15C01E747C} = {C27E9A9F-3A17-4482-9C5F-BF15C01E747C}
|
||||
{36636678-AE25-4BE6-9A34-2561D1BCF302} = {36636678-AE25-4BE6-9A34-2561D1BCF302}
|
||||
{9CAA294E-58C3-4CEB-ABA0-CB9786CA5540} = {9CAA294E-58C3-4CEB-ABA0-CB9786CA5540}
|
||||
{A100103A-353E-45E8-A9B8-90B87CC5C0B0} = {A100103A-353E-45E8-A9B8-90B87CC5C0B0}
|
||||
{37CA072A-5BDE-498B-B3A7-5E404F5F9BF2} = {37CA072A-5BDE-498B-B3A7-5E404F5F9BF2}
|
||||
{7B27E40E-F70A-4A74-A77C-0944D7931D15} = {7B27E40E-F70A-4A74-A77C-0944D7931D15}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libxml2", "..\..\DesktopEditor\xml\build\vs2005\libxml2.vcproj", "{21663823-DE45-479B-91D0-B4FEF4916EF0}"
|
||||
|
||||
@ -44,7 +44,7 @@
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="../../DesktopEditor/xml/build/vs2005;../../DesktopEditor/xml/libxml2/include"
|
||||
AdditionalIncludeDirectories="../../DesktopEditor/xml/build/vs2005;../../DesktopEditor/xml/libxml2/include;"../../DesktopEditor/freetype-2.5.2/include""
|
||||
PreprocessorDefinitions="_DEBUG;_CONSOLE;_USE_MATH_DEFINES;_USE_LIBXML2_READER_;LIBXML_READER_ENABLED;USE_LITE_READER;_USE_XMLLITE_READER_;_PRESENTATION_WRITER_;_SVG_CONVERT_TO_IMAGE_;DONT_WRITE_EMBEDDED_FONTS"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
|
||||
@ -82,6 +82,7 @@ public:
|
||||
LONG m_lDrawType;
|
||||
|
||||
std::wstring m_strPPTXShape;
|
||||
std::wstring m_strXmlString; //alternative
|
||||
|
||||
NSBaseShape::ClassType m_classType;
|
||||
|
||||
|
||||
@ -39,7 +39,14 @@ typedef std::vector<std::wstring> vector_string;
|
||||
|
||||
namespace NSPresentationEditor
|
||||
{
|
||||
static void CorrectColorPPT(LONG& lSchemeIndex)
|
||||
enum _typeMaster
|
||||
{
|
||||
typeMaster,
|
||||
typeNotesMaster,
|
||||
typeHandoutMaster
|
||||
|
||||
};
|
||||
static void CorrectColorPPT(LONG& lSchemeIndex)
|
||||
{
|
||||
//0x00 //Background color
|
||||
//0x01 //Text color
|
||||
|
||||
@ -40,6 +40,7 @@ namespace NSPresentationEditor
|
||||
class CTheme
|
||||
{
|
||||
public:
|
||||
_typeMaster m_eType;
|
||||
std::map<_UINT64, LONG> m_mapGeomToLayout;// типовые шаблоны
|
||||
std::map<DWORD, LONG> m_mapTitleLayout; // заголовочные шаблоны
|
||||
|
||||
@ -77,8 +78,8 @@ namespace NSPresentationEditor
|
||||
vector_string m_PlaceholdersReplaceString[3]; //0-dates, 1 - headers, 2 - footers
|
||||
|
||||
//------------------------------------------------------------------------------------
|
||||
CTheme() : m_arColorScheme(), m_arFonts(), m_arBrushes(),
|
||||
m_arPens(), m_arEffects(), m_arLayouts()
|
||||
CTheme(_typeMaster type = typeMaster) : m_arColorScheme(), m_arFonts(), m_arBrushes(),
|
||||
m_arPens(), m_arEffects(), m_arLayouts(), m_eType(type)
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
@ -90,6 +91,7 @@ namespace NSPresentationEditor
|
||||
|
||||
CTheme& operator=(const CTheme& oSrc)
|
||||
{
|
||||
m_eType = oSrc.m_eType;
|
||||
m_arColorScheme = oSrc.m_arColorScheme;
|
||||
m_arFonts = oSrc.m_arFonts;
|
||||
m_arBrushes = oSrc.m_arBrushes;
|
||||
@ -106,7 +108,10 @@ namespace NSPresentationEditor
|
||||
m_bHasFooter = oSrc.m_bHasFooter;
|
||||
m_nFormatDate = oSrc.m_nFormatDate;
|
||||
|
||||
for (size_t i = 0 ; i < 3 ; i++) m_PlaceholdersReplaceString[i] = oSrc.m_PlaceholdersReplaceString[i];
|
||||
for (size_t i = 0 ; i < 3 ; i++)
|
||||
{
|
||||
m_PlaceholdersReplaceString[i] = oSrc.m_PlaceholdersReplaceString[i];
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < oSrc.m_arExtraColorScheme.size(); ++i)
|
||||
{
|
||||
|
||||
@ -341,7 +341,32 @@ namespace PPTX
|
||||
{
|
||||
pReader->Seek(pReader->GetPos() - 4); //roll back to size record
|
||||
std::wstring sXmlContent;
|
||||
pReader->m_pMainDocument->getXmlContentElem(OOX::et_m_oMathPara, *pReader, sXmlContent);
|
||||
|
||||
if (pReader->m_pMainDocument)
|
||||
{
|
||||
pReader->m_pMainDocument->getXmlContentElem(OOX::et_m_oMathPara, *pReader, sXmlContent);
|
||||
}
|
||||
else
|
||||
{
|
||||
BinDocxRW::CDocxSerializer oDocxSerializer;
|
||||
NSBinPptxRW::CDrawingConverter oDrawingConverter;
|
||||
|
||||
NSBinPptxRW::CBinaryFileReader* old_reader = oDrawingConverter.m_pReader;
|
||||
NSBinPptxRW::CRelsGenerator* old_rels = pReader->m_pRels;
|
||||
|
||||
oDrawingConverter.m_pReader = pReader;
|
||||
pReader->m_pRels = new NSBinPptxRW::CRelsGenerator();
|
||||
|
||||
oDrawingConverter.SetMainDocument(&oDocxSerializer);
|
||||
|
||||
oDocxSerializer.m_pCurFileWriter = new Writers::FileWriter(sDstEmbeddedTemp, L"", false, 111, false, &oDrawingConverter, L"");
|
||||
|
||||
oDocxSerializer.getXmlContentElem(OOX::et_m_oMathPara, *pReader, sXmlContent);
|
||||
|
||||
pReader->m_pRels = old_rels;
|
||||
oDrawingConverter.m_pReader = old_reader;
|
||||
pReader->m_pMainDocument = NULL;
|
||||
}
|
||||
|
||||
if (!sXmlContent.empty())
|
||||
{
|
||||
|
||||
@ -776,7 +776,7 @@ public:
|
||||
sResult.erase(outsize_with_0 - 1);
|
||||
ansi = false;
|
||||
}
|
||||
#else
|
||||
#elif defined(__linux__)
|
||||
std::string sCodepage = "CP" + std::to_string(nCodepage);
|
||||
|
||||
iconv_t ic= iconv_open("WCHAR_T", sCodepage.c_str());
|
||||
|
||||
@ -70,6 +70,13 @@ namespace XLS
|
||||
L"333399", L"000000", L"FFFFFF", L"FF0000", L"00FF00", L"0000FF", L"FFFF00", L"FF00FF", L"00FFFF", L"800000",
|
||||
L"800000", L"800000", L"808000", L"800080", L"808000", L"C0C0C0", L"808080", L"9999FF", L"993366", L"FFFFCC",
|
||||
L"CCFFFF", L"660066", L"FF8080", L"0066CC", L"CCCCFF",
|
||||
//todoooo - подглядеть какие в мс далее
|
||||
L"800000", L"FF00FF", L"FFFF00", L"00FFFF", L"800080", L"800000", L"808000", L"0000FF", L"00CCFF", L"CCFFFF",
|
||||
L"CCFFCC", L"FFFF99", L"99CCFF", L"FF99CC", L"CC99FF", L"FFCC99", L"3366FF", L"33CCCC", L"99CC00", L"FFCC00",
|
||||
L"FF9900", L"FF6600", L"666699", L"969696", L"336600", L"339966", L"330000", L"333300", L"993300", L"993366",
|
||||
L"333399", L"000000", L"FFFFFF", L"FF0000", L"00FF00", L"0000FF", L"FFFF00", L"FF00FF", L"00FFFF", L"800000",
|
||||
L"800000", L"800000", L"808000", L"800080", L"808000", L"C0C0C0", L"808080", L"9999FF", L"993366", L"FFFFCC",
|
||||
L"CCFFFF", L"660066", L"FF8080", L"0066CC", L"CCCCFF"
|
||||
};
|
||||
|
||||
|
||||
|
||||
4
CHANGELOG.md
Normal file
4
CHANGELOG.md
Normal file
@ -0,0 +1,4 @@
|
||||
# Change log
|
||||
## 5.1.1
|
||||
### x2t
|
||||
*
|
||||
@ -51,6 +51,7 @@ SOURCES += \
|
||||
../Source/DocxFormat/Logic/TableProperty.cpp \
|
||||
../Source/DocxFormat/Logic/Vml.cpp \
|
||||
../Source/DocxFormat/Media/VbaProject.cpp \
|
||||
../Source/DocxFormat/Media/JsaProject.cpp \
|
||||
../Source/DocxFormat/Math/oMath.cpp \
|
||||
../Source/DocxFormat/Math/oMathContent.cpp \
|
||||
../Source/DocxFormat/Math/oMathPara.cpp \
|
||||
|
||||
@ -49,6 +49,7 @@
|
||||
#include "../Source/DocxFormat/Math/oMathContent.cpp"
|
||||
#include "../Source/DocxFormat/Math/oMathPara.cpp"
|
||||
#include "../Source/DocxFormat/Media/VbaProject.cpp"
|
||||
#include "../Source/DocxFormat/Media/JsaProject.cpp"
|
||||
#include "../Source/DocxFormat/Docx.cpp"
|
||||
#include "../Source/DocxFormat/FileFactory.cpp"
|
||||
#include "../Source/DocxFormat/IFileContainer.cpp"
|
||||
|
||||
@ -214,6 +214,8 @@
|
||||
690FE0831E9BBA23004B26D0 /* DiagramDrawing.h in Headers */ = {isa = PBXBuildFile; fileRef = 690FE0811E9BBA23004B26D0 /* DiagramDrawing.h */; };
|
||||
691C3E131F20C3D500F1775E /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 691C3E121F20C3D500F1775E /* File.cpp */; };
|
||||
69B3ADA920120093000EC6A7 /* VmlDrawing.h in Headers */ = {isa = PBXBuildFile; fileRef = 69B3ADA820120093000EC6A7 /* VmlDrawing.h */; };
|
||||
69DE2217206E24A700A07A0E /* JsaProject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 69DE2215206E24A700A07A0E /* JsaProject.cpp */; };
|
||||
69DE2218206E24A700A07A0E /* JsaProject.h in Headers */ = {isa = PBXBuildFile; fileRef = 69DE2216206E24A700A07A0E /* JsaProject.h */; };
|
||||
69E6AC872031AB0C00795D9D /* Xlsx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 69E6AC862031AB0C00795D9D /* Xlsx.cpp */; };
|
||||
69E6AC892031AC3500795D9D /* SheetData.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 69E6AC882031AC3500795D9D /* SheetData.cpp */; };
|
||||
69E6AC8C2031ACA900795D9D /* VbaProject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 69E6AC8A2031ACA900795D9D /* VbaProject.cpp */; };
|
||||
@ -438,6 +440,8 @@
|
||||
690FE0811E9BBA23004B26D0 /* DiagramDrawing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DiagramDrawing.h; sourceTree = "<group>"; };
|
||||
691C3E121F20C3D500F1775E /* File.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp.preprocessed; fileEncoding = 4; path = File.cpp; sourceTree = "<group>"; };
|
||||
69B3ADA820120093000EC6A7 /* VmlDrawing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = VmlDrawing.h; sourceTree = "<group>"; };
|
||||
69DE2215206E24A700A07A0E /* JsaProject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JsaProject.cpp; sourceTree = "<group>"; };
|
||||
69DE2216206E24A700A07A0E /* JsaProject.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JsaProject.h; sourceTree = "<group>"; };
|
||||
69E6AC862031AB0C00795D9D /* Xlsx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Xlsx.cpp; sourceTree = "<group>"; };
|
||||
69E6AC882031AC3500795D9D /* SheetData.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SheetData.cpp; sourceTree = "<group>"; };
|
||||
69E6AC8A2031ACA900795D9D /* VbaProject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = VbaProject.cpp; sourceTree = "<group>"; };
|
||||
@ -706,6 +710,8 @@
|
||||
17E6A0891AC4262700F28F8B /* Video.h */,
|
||||
69E6AC8A2031ACA900795D9D /* VbaProject.cpp */,
|
||||
69E6AC8B2031ACA900795D9D /* VbaProject.h */,
|
||||
69DE2215206E24A700A07A0E /* JsaProject.cpp */,
|
||||
69DE2216206E24A700A07A0E /* JsaProject.h */,
|
||||
);
|
||||
path = Media;
|
||||
sourceTree = "<group>";
|
||||
@ -1072,6 +1078,7 @@
|
||||
17C1FC711ACC4250006B99B3 /* Colors.h in Headers */,
|
||||
17C1FC721ACC4250006B99B3 /* Pos.h in Headers */,
|
||||
17C1FC731ACC4250006B99B3 /* CommonInclude.h in Headers */,
|
||||
69DE2218206E24A700A07A0E /* JsaProject.h in Headers */,
|
||||
17C1FC741ACC4250006B99B3 /* WebSettings.h in Headers */,
|
||||
17C1FC751ACC4250006B99B3 /* CustomXml.h in Headers */,
|
||||
17C1FC771ACC4250006B99B3 /* IFileContainer.h in Headers */,
|
||||
@ -1189,6 +1196,7 @@
|
||||
17C1FBB21ACC4250006B99B3 /* Vml.cpp in Sources */,
|
||||
17C1FBB31ACC4250006B99B3 /* unicode_util.cpp in Sources */,
|
||||
17C1FBB41ACC4250006B99B3 /* FldSimple.cpp in Sources */,
|
||||
69DE2217206E24A700A07A0E /* JsaProject.cpp in Sources */,
|
||||
69E6AC872031AB0C00795D9D /* Xlsx.cpp in Sources */,
|
||||
17C1FBB91ACC4250006B99B3 /* Paragraph.cpp in Sources */,
|
||||
17C1FBBA1ACC4250006B99B3 /* SimpleTypes_Word.cpp in Sources */,
|
||||
|
||||
@ -1197,6 +1197,14 @@
|
||||
RelativePath="..\Source\DocxFormat\Media\Image.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Source\DocxFormat\Media\JsaProject.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Source\DocxFormat\Media\JsaProject.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Source\DocxFormat\Media\Media.h"
|
||||
>
|
||||
|
||||
112
Common/DocxFormat/Source/DocxFormat/Media/JsaProject.cpp
Normal file
112
Common/DocxFormat/Source/DocxFormat/Media/JsaProject.cpp
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* (c) Copyright Ascensio System SIA 2010-2018
|
||||
*
|
||||
* This program is a free software product. You can redistribute it and/or
|
||||
* modify it under the terms of the GNU Affero General Public License (AGPL)
|
||||
* version 3 as published by the Free Software Foundation. In accordance with
|
||||
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
|
||||
* that Ascensio System SIA expressly excludes the warranty of non-infringement
|
||||
* of any third-party rights.
|
||||
*
|
||||
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
|
||||
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
|
||||
*
|
||||
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
|
||||
* EU, LV-1021.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code versions
|
||||
* of the Program must display Appropriate Legal Notices, as required under
|
||||
* Section 5 of the GNU AGPL version 3.
|
||||
*
|
||||
* Pursuant to Section 7(b) of the License you must retain the original Product
|
||||
* logo when distributing the program. Pursuant to Section 7(e) we decline to
|
||||
* grant you any rights under trademark law for use of our trademarks.
|
||||
*
|
||||
* All the Product's GUI elements, including illustrations and icon sets, as
|
||||
* well as technical writing content are licensed under the terms of the
|
||||
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
|
||||
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
|
||||
*
|
||||
*/
|
||||
#include "JsaProject.h"
|
||||
|
||||
#include "../Docx.h"
|
||||
#include "../../XlsxFormat/Xlsx.h"
|
||||
|
||||
#include "Media.h"
|
||||
#include "../../../../../ASCOfficePPTXFile/Editor/BinaryFileReaderWriter.h"
|
||||
#include "../../../../../ASCOfficePPTXFile/Editor/imagemanager.h"
|
||||
|
||||
#include "../IFileContainer.h"
|
||||
#include "../../XlsxFormat/FileTypes_Spreadsheet.h"
|
||||
|
||||
|
||||
namespace OOX
|
||||
{
|
||||
JsaProject::JsaProject( OOX::Document *pMain ) : Media(pMain)
|
||||
{
|
||||
OOX::CDocx* docx = dynamic_cast<OOX::CDocx*>(pMain);
|
||||
if (docx)
|
||||
{
|
||||
docx->m_pJsaProject = this;
|
||||
}
|
||||
else
|
||||
{
|
||||
OOX::Spreadsheet::CXlsx* xlsx = dynamic_cast<OOX::Spreadsheet::CXlsx*>(pMain);
|
||||
if (xlsx)
|
||||
{
|
||||
xlsx->m_pJsaProject = this;
|
||||
}
|
||||
}
|
||||
}
|
||||
JsaProject::JsaProject(OOX::Document *pMain, const CPath& filename) : Media(pMain)
|
||||
{
|
||||
OOX::CDocx* docx = dynamic_cast<OOX::CDocx*>(pMain);
|
||||
if (docx)
|
||||
{
|
||||
docx->m_pJsaProject = this;
|
||||
}
|
||||
else
|
||||
{
|
||||
OOX::Spreadsheet::CXlsx* xlsx = dynamic_cast<OOX::Spreadsheet::CXlsx*>(pMain);
|
||||
if (xlsx)
|
||||
{
|
||||
xlsx->m_pJsaProject = this;
|
||||
}
|
||||
}
|
||||
read(filename);
|
||||
}
|
||||
|
||||
void JsaProject::toPPTY(NSBinPptxRW::CBinaryFileWriter* pWriter) const
|
||||
{
|
||||
BYTE* pData = NULL;
|
||||
DWORD nBytesCount;
|
||||
if(NSFile::CFileBinary::ReadAllBytes(m_filename.GetPath(), &pData, nBytesCount))
|
||||
{
|
||||
pWriter->WriteBYTEArray(pData, nBytesCount);
|
||||
}
|
||||
}
|
||||
void JsaProject::fromPPTY(NSBinPptxRW::CBinaryFileReader* pReader)
|
||||
{
|
||||
LONG _length = pReader->GetLong();
|
||||
LONG _end_rec = pReader->GetPos() + _length;
|
||||
|
||||
if (_length > 0)
|
||||
{
|
||||
BYTE* pData = pReader->GetPointer(_length);
|
||||
std::wstring filePath = pReader->m_pRels->m_pManager->GetDstFolder() + FILE_SEPARATOR_STR + OOX::FileTypes::JsaProject.DefaultFileName().GetPath();
|
||||
|
||||
NSFile::CFileBinary oFile;
|
||||
oFile.CreateFileW(filePath);
|
||||
oFile.WriteFile(pData, _length);
|
||||
oFile.CloseFile();
|
||||
|
||||
pReader->m_pRels->m_pManager->m_pContentTypes->AddDefault(OOX::FileTypes::JsaProject.DefaultFileName().GetExtention(false));
|
||||
|
||||
set_filename(filePath, false);
|
||||
}
|
||||
pReader->Seek(_end_rec);
|
||||
}
|
||||
|
||||
} // namespace OOX
|
||||
@ -34,6 +34,9 @@
|
||||
#define OOX_ONLY_JSA_PROJECT_INCLUDE_H_
|
||||
|
||||
#include "Media.h"
|
||||
#include "../../../../../ASCOfficePPTXFile/Editor/BinaryFileReaderWriter.h"
|
||||
#include "../../../../../ASCOfficePPTXFile/Editor/imagemanager.h"
|
||||
|
||||
#include "../../XlsxFormat/FileTypes_Spreadsheet.h"
|
||||
|
||||
namespace OOX
|
||||
@ -41,13 +44,8 @@ namespace OOX
|
||||
class JsaProject : public Media
|
||||
{
|
||||
public:
|
||||
JsaProject( OOX::Document *pMain ) : Media(pMain)
|
||||
{
|
||||
}
|
||||
JsaProject(OOX::Document *pMain, const CPath& filename) : Media(pMain)
|
||||
{
|
||||
read(filename);
|
||||
}
|
||||
JsaProject( OOX::Document *pMain );
|
||||
JsaProject(OOX::Document *pMain, const CPath& filename);
|
||||
virtual ~JsaProject()
|
||||
{
|
||||
}
|
||||
@ -63,36 +61,8 @@ namespace OOX
|
||||
{
|
||||
return type().DefaultFileName();
|
||||
}
|
||||
virtual void toPPTY(NSBinPptxRW::CBinaryFileWriter* pWriter) const
|
||||
{
|
||||
BYTE* pData = NULL;
|
||||
DWORD nBytesCount;
|
||||
if(NSFile::CFileBinary::ReadAllBytes(m_filename.GetPath(), &pData, nBytesCount))
|
||||
{
|
||||
pWriter->WriteBYTEArray(pData, nBytesCount);
|
||||
}
|
||||
}
|
||||
virtual void fromPPTY(NSBinPptxRW::CBinaryFileReader* pReader)
|
||||
{
|
||||
LONG _length = pReader->GetLong();
|
||||
LONG _end_rec = pReader->GetPos() + _length;
|
||||
|
||||
if (_length > 0)
|
||||
{
|
||||
BYTE* pData = pReader->GetPointer(_length);
|
||||
std::wstring filePath = pReader->m_pRels->m_pManager->GetDstFolder() + FILE_SEPARATOR_STR + OOX::FileTypes::JsaProject.DefaultFileName().GetPath();
|
||||
|
||||
NSFile::CFileBinary oFile;
|
||||
oFile.CreateFileW(filePath);
|
||||
oFile.WriteFile(pData, _length);
|
||||
oFile.CloseFile();
|
||||
|
||||
pReader->m_pRels->m_pManager->m_pContentTypes->AddDefault(OOX::FileTypes::JsaProject.DefaultFileName().GetExtention(false));
|
||||
|
||||
set_filename(filePath, false);
|
||||
}
|
||||
pReader->Seek(_end_rec);
|
||||
}
|
||||
virtual void toPPTY(NSBinPptxRW::CBinaryFileWriter* pWriter) const;
|
||||
virtual void fromPPTY(NSBinPptxRW::CBinaryFileReader* pReader);
|
||||
};
|
||||
} // namespace OOX
|
||||
|
||||
|
||||
@ -72,6 +72,8 @@ public:
|
||||
bool isPptFormatFile (POLE::Storage *storage);
|
||||
bool isMS_OFFCRYPTOFormatFile (POLE::Storage * storage);
|
||||
|
||||
bool isDocFlatFormatFile (unsigned char* pBuffer,int dwBytes);
|
||||
|
||||
bool isRtfFormatFile (unsigned char* pBuffer,int dwBytes);
|
||||
bool isHtmlFormatFile (unsigned char* pBuffer,int dwBytes, bool testCloseTag);
|
||||
bool isPdfFormatFile (unsigned char* pBuffer,int dwBytes);
|
||||
|
||||
@ -144,20 +144,29 @@ bool COfficeFileFormatChecker::isDocFormatFile (POLE::Storage * storage)
|
||||
|
||||
POLE::Stream stream(storage, L"WordDocument");
|
||||
|
||||
unsigned char buffer[10];
|
||||
if (stream.read(buffer,10) > 0)
|
||||
unsigned char buffer[64];
|
||||
if (stream.read(buffer, 64) > 0)
|
||||
{
|
||||
//ms office 2007 encrypted contains stream WordDocument !!
|
||||
std::list<std::wstring> entries = storage->entries(L"DataSpaces");
|
||||
if (entries.size() > 0)
|
||||
return false;
|
||||
|
||||
#if defined FILE_FORMAT_CHECKER_WITH_MACRO
|
||||
|
||||
if ((buffer[0] == 0xEC && buffer[1] == 0xA5) || // word 1997-2003
|
||||
(buffer[0] == 0xDC && buffer[1] == 0xA5) || // word 1995
|
||||
(buffer[0] == 0xDB && buffer[1] == 0xA5)) // word 2.0
|
||||
{
|
||||
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC;
|
||||
}
|
||||
else if ( isHtmlFormatFile(buffer, 64, false) )
|
||||
{
|
||||
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML_IN_CONTAINER;
|
||||
return true;
|
||||
}
|
||||
if (storage->isDirectory(L"Macros"))
|
||||
{
|
||||
bMacroEnabled = true;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -193,14 +202,23 @@ bool COfficeFileFormatChecker::isXlsFormatFile (POLE::Storage * storage)
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined FILE_FORMAT_CHECKER_WITH_MACRO
|
||||
if (storage->isDirectory(L"_VBA_PROJECT_CUR"))
|
||||
{
|
||||
bMacroEnabled = true;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
bool COfficeFileFormatChecker::isDocFlatFormatFile (unsigned char* pBuffer, int dwBytes)
|
||||
{
|
||||
if (pBuffer == NULL) return false;
|
||||
|
||||
if ((pBuffer[0] == 0xEC && pBuffer[1] == 0xA5) ||
|
||||
(pBuffer[0] == 0xDC && pBuffer[1] == 0xA5) ||
|
||||
(pBuffer[0] == 0xDB && pBuffer[1] == 0xA5))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool COfficeFileFormatChecker::isPptFormatFile (POLE::Storage * storage)
|
||||
{
|
||||
@ -236,8 +254,8 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring & fileName)
|
||||
{
|
||||
if ( isDocFormatFile(&storage) )
|
||||
{
|
||||
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC;
|
||||
return true;
|
||||
//nFileType внутри
|
||||
return true;
|
||||
}
|
||||
else if ( isXlsFormatFile(&storage) )
|
||||
{
|
||||
@ -265,7 +283,7 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring & fileName)
|
||||
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_OFFCRYPTO;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
COfficeUtils OfficeUtils(NULL);
|
||||
if (OfficeUtils.IsArchive(fileName) == S_OK)
|
||||
@ -335,6 +353,11 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring & fileName)
|
||||
{
|
||||
//nFileType
|
||||
}
|
||||
else if (isDocFlatFormatFile(buffer,sizeRead) )
|
||||
{
|
||||
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC_FLAT; // without compaund container
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------------------
|
||||
file.CloseFile();
|
||||
|
||||
|
||||
@ -49,6 +49,8 @@
|
||||
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTM AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x000d
|
||||
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT_FLAT AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x000e
|
||||
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_OTT AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x000f
|
||||
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC_FLAT AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0010
|
||||
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML_IN_CONTAINER AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0011
|
||||
|
||||
#define AVS_OFFICESTUDIO_FILE_PRESENTATION 0x0080
|
||||
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0001
|
||||
@ -60,7 +62,7 @@
|
||||
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_POTX AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0007
|
||||
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_POTM AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0008
|
||||
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP_FLAT AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0009
|
||||
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_OTP AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x0010
|
||||
#define AVS_OFFICESTUDIO_FILE_PRESENTATION_OTP AVS_OFFICESTUDIO_FILE_PRESENTATION + 0x000a
|
||||
|
||||
#define AVS_OFFICESTUDIO_FILE_SPREADSHEET 0x0100
|
||||
#define AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX AVS_OFFICESTUDIO_FILE_SPREADSHEET + 0x0001
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
VERSION = 2.4.523.0
|
||||
VERSION = 2.4.526.0
|
||||
DEFINES += INTVER=$$VERSION
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
@ -257,7 +257,7 @@ namespace agg
|
||||
projects[3] = project(bounds.x2, bounds.y2);
|
||||
|
||||
double min = projects[0].x * projects[0].x + projects[0].y * projects[0].y;
|
||||
if (projects[0].x * m_cos + projects[0].y * m_sin)
|
||||
if ((projects[0].x * m_cos + projects[0].y * m_sin) < 0)
|
||||
min = -min;
|
||||
|
||||
double max = min;
|
||||
@ -307,6 +307,14 @@ namespace agg
|
||||
{
|
||||
if ( t < m_pPosSubColors[i] )
|
||||
{
|
||||
if (i == 1 && t < m_pPosSubColors[0])
|
||||
{
|
||||
// меньше меньшего
|
||||
m_color_table[index] = m_pSubColors[0];
|
||||
bFindColor = true;
|
||||
break;
|
||||
}
|
||||
|
||||
t = (t - m_pPosSubColors[i - 1]) / (m_pPosSubColors[i] - m_pPosSubColors[i - 1]);
|
||||
m_color_table[index] = m_pSubColors[i - 1].gradient( m_pSubColors[i], t );
|
||||
|
||||
@ -437,6 +445,14 @@ namespace agg
|
||||
|
||||
for (int i = 1; i < m_nCountSubColors; ++i)
|
||||
{
|
||||
if (i == 1 && t < m_pPosSubColors[0])
|
||||
{
|
||||
// меньше меньшего
|
||||
m_color_table[index] = m_pSubColors[0];
|
||||
bFindColor = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if ( t < m_pPosSubColors[i] )
|
||||
{
|
||||
t = (t - m_pPosSubColors[i - 1]) / (m_pPosSubColors[i] - m_pPosSubColors[i - 1]);
|
||||
|
||||
@ -249,10 +249,10 @@
|
||||
HEADER_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
/usr/include/,
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/**",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/common",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/i18n",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/io",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/**",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/common",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/i18n",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/io",
|
||||
);
|
||||
OTHER_LDFLAGS = "-ObjC";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
@ -278,10 +278,10 @@
|
||||
HEADER_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
/usr/include/,
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/**",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/common",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/i18n",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds/mac/icu/icu/io",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/**",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/common",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/i18n",
|
||||
"$(PROJECT_DIR)/../../../UnicodeConverter/icubuilds-mac/icu/icu/io",
|
||||
);
|
||||
OTHER_LDFLAGS = "-ObjC";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,729 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1996-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* FILE NAME : UTYPES.H (formerly ptypes.h)
|
||||
*
|
||||
* Date Name Description
|
||||
* 12/11/96 helena Creation.
|
||||
* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
|
||||
* uint8, uint16, and uint32.
|
||||
* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
|
||||
* well as C++.
|
||||
* Modified to use memcpy() for uprv_arrayCopy() fns.
|
||||
* 04/14/97 aliu Added TPlatformUtilities.
|
||||
* 05/07/97 aliu Added import/export specifiers (replacing the old
|
||||
* broken EXT_CLASS). Added version number for our
|
||||
* code. Cleaned up header.
|
||||
* 6/20/97 helena Java class name change.
|
||||
* 08/11/98 stephen UErrorCode changed from typedef to enum
|
||||
* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
|
||||
* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
|
||||
* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
|
||||
* 04/20/99 stephen Cleaned up & reworked for autoconf.
|
||||
* Renamed to utypes.h.
|
||||
* 05/05/99 stephen Changed to use <inttypes.h>
|
||||
* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UTYPES_H
|
||||
#define UTYPES_H
|
||||
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "unicode/uconfig.h"
|
||||
#include <float.h>
|
||||
|
||||
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief Basic definitions for ICU, for both C and C++ APIs
|
||||
*
|
||||
* This file defines basic types, constants, and enumerations directly or
|
||||
* indirectly by including other header files, especially utf.h for the
|
||||
* basic character and string definitions and umachine.h for consistent
|
||||
* integer and other types.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \def U_SHOW_CPLUSPLUS_API
|
||||
* @internal
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
# ifndef U_SHOW_CPLUSPLUS_API
|
||||
# define U_SHOW_CPLUSPLUS_API 1
|
||||
# endif
|
||||
#else
|
||||
# undef U_SHOW_CPLUSPLUS_API
|
||||
# define U_SHOW_CPLUSPLUS_API 0
|
||||
#endif
|
||||
|
||||
/** @{ API visibility control */
|
||||
|
||||
/**
|
||||
* \def U_HIDE_DRAFT_API
|
||||
* Define this to 1 to request that draft API be "hidden"
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* \def U_HIDE_INTERNAL_API
|
||||
* Define this to 1 to request that internal API be "hidden"
|
||||
* @internal
|
||||
*/
|
||||
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
|
||||
#define U_HIDE_DRAFT_API 1
|
||||
#endif
|
||||
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
|
||||
#define U_HIDE_INTERNAL_API 1
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/* ICUDATA naming scheme */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_ICUDATA_TYPE_LETTER
|
||||
*
|
||||
* This is a platform-dependent string containing one letter:
|
||||
* - b for big-endian, ASCII-family platforms
|
||||
* - l for little-endian, ASCII-family platforms
|
||||
* - e for big-endian, EBCDIC-family platforms
|
||||
* This letter is part of the common data file name.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_ICUDATA_TYPE_LITLETTER
|
||||
* The non-string form of U_ICUDATA_TYPE_LETTER
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_CHARSET_FAMILY
|
||||
# if U_IS_BIG_ENDIAN
|
||||
/* EBCDIC - should always be BE */
|
||||
# define U_ICUDATA_TYPE_LETTER "e"
|
||||
# define U_ICUDATA_TYPE_LITLETTER e
|
||||
# else
|
||||
# error "Don't know what to do with little endian EBCDIC!"
|
||||
# define U_ICUDATA_TYPE_LETTER "x"
|
||||
# define U_ICUDATA_TYPE_LITLETTER x
|
||||
# endif
|
||||
#else
|
||||
# if U_IS_BIG_ENDIAN
|
||||
/* Big-endian ASCII */
|
||||
# define U_ICUDATA_TYPE_LETTER "b"
|
||||
# define U_ICUDATA_TYPE_LITLETTER b
|
||||
# else
|
||||
/* Little-endian ASCII */
|
||||
# define U_ICUDATA_TYPE_LETTER "l"
|
||||
# define U_ICUDATA_TYPE_LITLETTER l
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A single string literal containing the icudata stub name. i.e. 'icudt18e' for
|
||||
* ICU 1.8.x on EBCDIC, etc..
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
|
||||
#define U_USE_USRDATA 1 /**< @internal */
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
|
||||
* Defined as a literal, not a string.
|
||||
* Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
|
||||
* from the corresponding macro invocation, _before_ other macro substitutions.
|
||||
* Need a nested \#defines to get the actual version numbers rather than
|
||||
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
|
||||
* The net result will be something of the form
|
||||
* \#define U_ICU_ENTRY_POINT icudt19_dat
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
|
||||
* @internal
|
||||
*/
|
||||
#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
|
||||
|
||||
/**
|
||||
* Do not use.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DEF_ICUDATA_ENTRY_POINT
|
||||
/* affected by symbol renaming. See platform.h */
|
||||
#ifndef U_LIB_SUFFIX_C_NAME
|
||||
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
|
||||
#else
|
||||
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
|
||||
#endif
|
||||
#endif
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* \def NULL
|
||||
* Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifndef NULL
|
||||
#ifdef __cplusplus
|
||||
#define NULL 0
|
||||
#else
|
||||
#define NULL ((void *)0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Calendar/TimeZone data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Date and Time data type.
|
||||
* This is a primitive data type that holds the date and time
|
||||
* as the number of milliseconds since 1970-jan-01, 00:00 UTC.
|
||||
* UTC leap seconds are ignored.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef double UDate;
|
||||
|
||||
/** The number of milliseconds per second @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_SECOND (1000)
|
||||
/** The number of milliseconds per minute @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_MINUTE (60000)
|
||||
/** The number of milliseconds per hour @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_HOUR (3600000)
|
||||
/** The number of milliseconds per day @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_DAY (86400000)
|
||||
|
||||
/**
|
||||
* Maximum UDate value
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_DATE_MAX DBL_MAX
|
||||
|
||||
/**
|
||||
* Minimum UDate value
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_DATE_MIN -U_DATE_MAX
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Shared library/DLL import-export API control */
|
||||
/*===========================================================================*/
|
||||
|
||||
/*
|
||||
* Control of symbol import/export.
|
||||
* ICU is separated into three libraries.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_COMBINED_IMPLEMENTATION
|
||||
* Set to export library symbols from inside the ICU library
|
||||
* when all of ICU is in a single library.
|
||||
* This can be set as a compiler option while building ICU, and it
|
||||
* needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_DATA_API
|
||||
* Set to export library symbols from inside the stubdata library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_COMMON_API
|
||||
* Set to export library symbols from inside the common library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_I18N_API
|
||||
* Set to export library symbols from inside the i18n library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_LAYOUT_API
|
||||
* Set to export library symbols from inside the layout engine library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_LAYOUTEX_API
|
||||
* Set to export library symbols from inside the layout extensions library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_IO_API
|
||||
* Set to export library symbols from inside the ustdio library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_TOOLUTIL_API
|
||||
* Set to export library symbols from inside the toolutil library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
|
||||
#if defined(U_COMBINED_IMPLEMENTATION)
|
||||
#define U_DATA_API U_EXPORT
|
||||
#define U_COMMON_API U_EXPORT
|
||||
#define U_I18N_API U_EXPORT
|
||||
#define U_LAYOUT_API U_EXPORT
|
||||
#define U_LAYOUTEX_API U_EXPORT
|
||||
#define U_IO_API U_EXPORT
|
||||
#define U_TOOLUTIL_API U_EXPORT
|
||||
#elif defined(U_STATIC_IMPLEMENTATION)
|
||||
#define U_DATA_API
|
||||
#define U_COMMON_API
|
||||
#define U_I18N_API
|
||||
#define U_LAYOUT_API
|
||||
#define U_LAYOUTEX_API
|
||||
#define U_IO_API
|
||||
#define U_TOOLUTIL_API
|
||||
#elif defined(U_COMMON_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_EXPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_I18N_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_EXPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_LAYOUT_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_EXPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_LAYOUTEX_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_EXPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_IO_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_EXPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_EXPORT
|
||||
#else
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_STANDARD_CPP_NAMESPACE
|
||||
* Control of C++ Namespace
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
#define U_STANDARD_CPP_NAMESPACE ::
|
||||
#else
|
||||
#define U_STANDARD_CPP_NAMESPACE
|
||||
#endif
|
||||
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Global delete operator */
|
||||
/*===========================================================================*/
|
||||
|
||||
/*
|
||||
* The ICU4C library must not use the global new and delete operators.
|
||||
* These operators here are defined to enable testing for this.
|
||||
* See Jitterbug 2581 for details of why this is necessary.
|
||||
*
|
||||
* Verification that ICU4C's memory usage is correct, i.e.,
|
||||
* that global new/delete are not used:
|
||||
*
|
||||
* a) Check for imports of global new/delete (see uobject.cpp for details)
|
||||
* b) Verify that new is never imported.
|
||||
* c) Verify that delete is only imported from object code for interface/mixin classes.
|
||||
* d) Add global delete and delete[] only for the ICU4C library itself
|
||||
* and define them in a way that crashes or otherwise easily shows a problem.
|
||||
*
|
||||
* The following implements d).
|
||||
* The operator implementations crash; this is intentional and used for library debugging.
|
||||
*
|
||||
* Note: This is currently only done on Windows because
|
||||
* some Linux/Unix compilers have problems with defining global new/delete.
|
||||
* On Windows, it is _MSC_VER>=1200 for MSVC 6.0 and higher.
|
||||
*/
|
||||
#if defined(__cplusplus) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION))
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Global operator new, defined only inside ICU4C, must not be used.
|
||||
* Crashes intentionally.
|
||||
* @internal
|
||||
*/
|
||||
inline void *
|
||||
operator new(size_t /*size*/) {
|
||||
char *q=NULL;
|
||||
*q=5; /* break it */
|
||||
return q;
|
||||
}
|
||||
|
||||
#ifdef _Ret_bytecap_
|
||||
/* This is only needed to suppress a Visual C++ 2008 warning for operator new[]. */
|
||||
_Ret_bytecap_(_Size)
|
||||
#endif
|
||||
/**
|
||||
* Global operator new[], defined only inside ICU4C, must not be used.
|
||||
* Crashes intentionally.
|
||||
* @internal
|
||||
*/
|
||||
inline void *
|
||||
operator new[](size_t /*size*/) {
|
||||
char *q=NULL;
|
||||
*q=5; /* break it */
|
||||
return q;
|
||||
}
|
||||
|
||||
/**
|
||||
* Global operator delete, defined only inside ICU4C, must not be used.
|
||||
* Crashes intentionally.
|
||||
* @internal
|
||||
*/
|
||||
inline void
|
||||
operator delete(void * /*p*/) {
|
||||
char *q=NULL;
|
||||
*q=5; /* break it */
|
||||
}
|
||||
|
||||
/**
|
||||
* Global operator delete[], defined only inside ICU4C, must not be used.
|
||||
* Crashes intentionally.
|
||||
* @internal
|
||||
*/
|
||||
inline void
|
||||
operator delete[](void * /*p*/) {
|
||||
char *q=NULL;
|
||||
*q=5; /* break it */
|
||||
}
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* UErrorCode */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Error code to replace exception handling, so that the code is compatible with all C++ compilers,
|
||||
* and to use the same mechanism for C and C++.
|
||||
*
|
||||
* \par
|
||||
* ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode
|
||||
* first test if(U_FAILURE(errorCode)) { return immediately; }
|
||||
* so that in a chain of such functions the first one that sets an error code
|
||||
* causes the following ones to not perform any operations.
|
||||
*
|
||||
* \par
|
||||
* Error codes should be tested using U_FAILURE() and U_SUCCESS().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum UErrorCode {
|
||||
/* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
|
||||
* and is that way because VC++ debugger displays first encountered constant,
|
||||
* which is not the what the code is used for
|
||||
*/
|
||||
|
||||
U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
|
||||
|
||||
U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
|
||||
|
||||
U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
|
||||
|
||||
U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
|
||||
|
||||
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
|
||||
|
||||
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
|
||||
|
||||
U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
|
||||
|
||||
U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
|
||||
|
||||
U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
|
||||
|
||||
U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
|
||||
|
||||
U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
|
||||
|
||||
|
||||
U_ZERO_ERROR = 0, /**< No error, no warning. */
|
||||
|
||||
U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
|
||||
U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
|
||||
U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
|
||||
U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
|
||||
U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
|
||||
U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
|
||||
U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
|
||||
U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
|
||||
U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
|
||||
U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
|
||||
U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
|
||||
U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
|
||||
U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
|
||||
U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
|
||||
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
|
||||
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
|
||||
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
|
||||
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */
|
||||
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
|
||||
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
|
||||
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
|
||||
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
|
||||
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
|
||||
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
|
||||
It is very possible that a circular alias definition has occured */
|
||||
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
|
||||
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
|
||||
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
|
||||
U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
|
||||
U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
|
||||
U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
|
||||
|
||||
U_STANDARD_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for standard errors */
|
||||
/*
|
||||
* the error code range 0x10000 0x10100 are reserved for Transliterator
|
||||
*/
|
||||
U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
|
||||
U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
|
||||
U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
|
||||
U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
|
||||
U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
|
||||
U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
|
||||
U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
|
||||
U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
|
||||
U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
|
||||
U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
|
||||
U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
|
||||
U_MISSING_OPERATOR, /**< A rule contains no operator */
|
||||
U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
|
||||
U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
|
||||
U_MULTIPLE_CURSORS, /**< More than one cursor */
|
||||
U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
|
||||
U_TRAILING_BACKSLASH, /**< A dangling backslash */
|
||||
U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
|
||||
U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
|
||||
U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
|
||||
U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
|
||||
U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
|
||||
U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
|
||||
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
|
||||
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
|
||||
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */
|
||||
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
|
||||
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
|
||||
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
|
||||
U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
|
||||
U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
|
||||
U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
|
||||
U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
|
||||
U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
|
||||
U_PARSE_ERROR_LIMIT, /**< The limit for Transliterator errors */
|
||||
|
||||
/*
|
||||
* the error code range 0x10100 0x10200 are reserved for formatting API parsing error
|
||||
*/
|
||||
U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
|
||||
U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
|
||||
U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
|
||||
U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
|
||||
U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
|
||||
U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
|
||||
U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
|
||||
U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
|
||||
U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
|
||||
U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
|
||||
U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
|
||||
U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
|
||||
U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
|
||||
U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
|
||||
U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
|
||||
U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
|
||||
U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
|
||||
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
|
||||
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
|
||||
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
|
||||
U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */
|
||||
|
||||
/*
|
||||
* the error code range 0x10200 0x102ff are reserved for Break Iterator related error
|
||||
*/
|
||||
U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
|
||||
U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
|
||||
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
|
||||
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
|
||||
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
|
||||
U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */
|
||||
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
|
||||
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
|
||||
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
|
||||
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
|
||||
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
|
||||
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
|
||||
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
|
||||
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
|
||||
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */
|
||||
U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */
|
||||
|
||||
/*
|
||||
* The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs
|
||||
*/
|
||||
U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
|
||||
U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
|
||||
U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
|
||||
U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
|
||||
U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
|
||||
U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
|
||||
U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
|
||||
U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
|
||||
U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
|
||||
U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
|
||||
U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
|
||||
U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
|
||||
U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
|
||||
U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
|
||||
U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
|
||||
U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
|
||||
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
|
||||
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
|
||||
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @draft ICU 55 */
|
||||
U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @draft ICU 55 */
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3, /**< This must always be the last value to indicate the limit for regexp errors */
|
||||
|
||||
/*
|
||||
* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
|
||||
*/
|
||||
U_IDNA_PROHIBITED_ERROR=0x10400,
|
||||
U_IDNA_ERROR_START=0x10400,
|
||||
U_IDNA_UNASSIGNED_ERROR,
|
||||
U_IDNA_CHECK_BIDI_ERROR,
|
||||
U_IDNA_STD3_ASCII_RULES_ERROR,
|
||||
U_IDNA_ACE_PREFIX_ERROR,
|
||||
U_IDNA_VERIFICATION_ERROR,
|
||||
U_IDNA_LABEL_TOO_LONG_ERROR,
|
||||
U_IDNA_ZERO_LENGTH_LABEL_ERROR,
|
||||
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
|
||||
U_IDNA_ERROR_LIMIT,
|
||||
/*
|
||||
* Aliases for StringPrep
|
||||
*/
|
||||
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
|
||||
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
|
||||
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
|
||||
|
||||
/*
|
||||
* The error code in the range 0x10500-0x105ff are reserved for Plugin related error codes
|
||||
*/
|
||||
U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
|
||||
U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
|
||||
U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
|
||||
U_PLUGIN_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for plugin errors */
|
||||
|
||||
U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
|
||||
} UErrorCode;
|
||||
|
||||
/* Use the following to determine if an UErrorCode represents */
|
||||
/* operational success or failure. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Does the error code indicate success?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static
|
||||
inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
|
||||
/**
|
||||
* Does the error code indicate a failure?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static
|
||||
inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
|
||||
#else
|
||||
/**
|
||||
* Does the error code indicate success?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
|
||||
/**
|
||||
* Does the error code indicate a failure?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Return a string for a UErrorCode value.
|
||||
* The string will be the same as the name of the error code constant
|
||||
* in the UErrorCode enum above.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_STABLE const char * U_EXPORT2
|
||||
u_errorName(UErrorCode code);
|
||||
|
||||
|
||||
#endif /* _UTYPES */
|
||||
2396
UnicodeConverter/icubuilds-mac/icu/icu.xcodeproj/project.pbxproj
Normal file
2396
UnicodeConverter/icubuilds-mac/icu/icu.xcodeproj/project.pbxproj
Normal file
File diff suppressed because it is too large
Load Diff
7
UnicodeConverter/icubuilds-mac/icu/icu.xcodeproj/project.xcworkspace/contents.xcworkspacedata
generated
Normal file
7
UnicodeConverter/icubuilds-mac/icu/icu.xcodeproj/project.xcworkspace/contents.xcworkspacedata
generated
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Workspace
|
||||
version = "1.0">
|
||||
<FileRef
|
||||
location = "self:icu.xcodeproj">
|
||||
</FileRef>
|
||||
</Workspace>
|
||||
72
UnicodeConverter/icubuilds-mac/icu/icu/common/appendable.cpp
Normal file
72
UnicodeConverter/icubuilds-mac/icu/icu/common/appendable.cpp
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: appendable.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010dec07
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/appendable.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
Appendable::~Appendable() {}
|
||||
|
||||
UBool
|
||||
Appendable::appendCodePoint(UChar32 c) {
|
||||
if(c<=0xffff) {
|
||||
return appendCodeUnit((UChar)c);
|
||||
} else {
|
||||
return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
Appendable::appendString(const UChar *s, int32_t length) {
|
||||
if(length<0) {
|
||||
UChar c;
|
||||
while((c=*s++)!=0) {
|
||||
if(!appendCodeUnit(c)) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
} else if(length>0) {
|
||||
const UChar *limit=s+length;
|
||||
do {
|
||||
if(!appendCodeUnit(*s++)) {
|
||||
return FALSE;
|
||||
}
|
||||
} while(s<limit);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool
|
||||
Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UChar *
|
||||
Appendable::getAppendBuffer(int32_t minCapacity,
|
||||
int32_t /*desiredCapacityHint*/,
|
||||
UChar *scratch, int32_t scratchCapacity,
|
||||
int32_t *resultCapacity) {
|
||||
if(minCapacity<1 || scratchCapacity<minCapacity) {
|
||||
*resultCapacity=0;
|
||||
return NULL;
|
||||
}
|
||||
*resultCapacity=scratchCapacity;
|
||||
return scratch;
|
||||
}
|
||||
|
||||
// UnicodeStringAppendable is implemented in unistr.cpp.
|
||||
|
||||
U_NAMESPACE_END
|
||||
725
UnicodeConverter/icubuilds-mac/icu/icu/common/bmpset.cpp
Normal file
725
UnicodeConverter/icubuilds-mac/icu/icu/common/bmpset.cpp
Normal file
@ -0,0 +1,725 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: bmpset.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2007jan29
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "bmpset.h"
|
||||
#include "uassert.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
|
||||
list(parentList), listLength(parentListLength) {
|
||||
uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
|
||||
uprv_memset(table7FF, 0, sizeof(table7FF));
|
||||
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
|
||||
|
||||
/*
|
||||
* Set the list indexes for binary searches for
|
||||
* U+0800, U+1000, U+2000, .., U+F000, U+10000.
|
||||
* U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
|
||||
* looked up in the bit tables.
|
||||
* The last pair of indexes is for finding supplementary code points.
|
||||
*/
|
||||
list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
|
||||
int32_t i;
|
||||
for(i=1; i<=0x10; ++i) {
|
||||
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
|
||||
}
|
||||
list4kStarts[0x11]=listLength-1;
|
||||
|
||||
initBits();
|
||||
overrideIllegal();
|
||||
}
|
||||
|
||||
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
|
||||
list(newParentList), listLength(newParentListLength) {
|
||||
uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
|
||||
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
|
||||
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
|
||||
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
|
||||
}
|
||||
|
||||
BMPSet::~BMPSet() {
|
||||
}
|
||||
|
||||
/*
|
||||
* Set bits in a bit rectangle in "vertical" bit organization.
|
||||
* start<limit<=0x800
|
||||
*/
|
||||
static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
|
||||
U_ASSERT(start<limit);
|
||||
U_ASSERT(limit<=0x800);
|
||||
|
||||
int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits.
|
||||
int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
|
||||
|
||||
// Set one bit indicating an all-one block.
|
||||
uint32_t bits=(uint32_t)1<<lead;
|
||||
if((start+1)==limit) { // Single-character shortcut.
|
||||
table[trail]|=bits;
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t limitLead=limit>>6;
|
||||
int32_t limitTrail=limit&0x3f;
|
||||
|
||||
if(lead==limitLead) {
|
||||
// Partial vertical bit column.
|
||||
while(trail<limitTrail) {
|
||||
table[trail++]|=bits;
|
||||
}
|
||||
} else {
|
||||
// Partial vertical bit column,
|
||||
// followed by a bit rectangle,
|
||||
// followed by another partial vertical bit column.
|
||||
if(trail>0) {
|
||||
do {
|
||||
table[trail++]|=bits;
|
||||
} while(trail<64);
|
||||
++lead;
|
||||
}
|
||||
if(lead<limitLead) {
|
||||
bits=~((1<<lead)-1);
|
||||
if(limitLead<0x20) {
|
||||
bits&=(1<<limitLead)-1;
|
||||
}
|
||||
for(trail=0; trail<64; ++trail) {
|
||||
table[trail]|=bits;
|
||||
}
|
||||
}
|
||||
// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
|
||||
// In that case, bits=1<<limitLead is undefined but the bits value
|
||||
// is not used because trail<limitTrail is already false.
|
||||
bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
|
||||
for(trail=0; trail<limitTrail; ++trail) {
|
||||
table[trail]|=bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BMPSet::initBits() {
|
||||
UChar32 start, limit;
|
||||
int32_t listIndex=0;
|
||||
|
||||
// Set asciiBytes[].
|
||||
do {
|
||||
start=list[listIndex++];
|
||||
if(listIndex<listLength) {
|
||||
limit=list[listIndex++];
|
||||
} else {
|
||||
limit=0x110000;
|
||||
}
|
||||
if(start>=0x80) {
|
||||
break;
|
||||
}
|
||||
do {
|
||||
asciiBytes[start++]=1;
|
||||
} while(start<limit && start<0x80);
|
||||
} while(limit<=0x80);
|
||||
|
||||
// Set table7FF[].
|
||||
while(start<0x800) {
|
||||
set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
|
||||
if(limit>0x800) {
|
||||
start=0x800;
|
||||
break;
|
||||
}
|
||||
|
||||
start=list[listIndex++];
|
||||
if(listIndex<listLength) {
|
||||
limit=list[listIndex++];
|
||||
} else {
|
||||
limit=0x110000;
|
||||
}
|
||||
}
|
||||
|
||||
// Set bmpBlockBits[].
|
||||
int32_t minStart=0x800;
|
||||
while(start<0x10000) {
|
||||
if(limit>0x10000) {
|
||||
limit=0x10000;
|
||||
}
|
||||
|
||||
if(start<minStart) {
|
||||
start=minStart;
|
||||
}
|
||||
if(start<limit) { // Else: Another range entirely in a known mixed-value block.
|
||||
if(start&0x3f) {
|
||||
// Mixed-value block of 64 code points.
|
||||
start>>=6;
|
||||
bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
|
||||
start=(start+1)<<6; // Round up to the next block boundary.
|
||||
minStart=start; // Ignore further ranges in this block.
|
||||
}
|
||||
if(start<limit) {
|
||||
if(start<(limit&~0x3f)) {
|
||||
// Multiple all-ones blocks of 64 code points each.
|
||||
set32x64Bits(bmpBlockBits, start>>6, limit>>6);
|
||||
}
|
||||
|
||||
if(limit&0x3f) {
|
||||
// Mixed-value block of 64 code points.
|
||||
limit>>=6;
|
||||
bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
|
||||
limit=(limit+1)<<6; // Round up to the next block boundary.
|
||||
minStart=limit; // Ignore further ranges in this block.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(limit==0x10000) {
|
||||
break;
|
||||
}
|
||||
|
||||
start=list[listIndex++];
|
||||
if(listIndex<listLength) {
|
||||
limit=list[listIndex++];
|
||||
} else {
|
||||
limit=0x110000;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Override some bits and bytes to the result of contains(FFFD)
|
||||
* for faster validity checking at runtime.
|
||||
* No need to set 0 values where they were reset to 0 in the constructor
|
||||
* and not modified by initBits().
|
||||
* (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
|
||||
* Need to set 0 values for surrogates D800..DFFF.
|
||||
*/
|
||||
void BMPSet::overrideIllegal() {
|
||||
uint32_t bits, mask;
|
||||
int32_t i;
|
||||
|
||||
if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
|
||||
// contains(FFFD)==TRUE
|
||||
for(i=0x80; i<0xc0; ++i) {
|
||||
asciiBytes[i]=1;
|
||||
}
|
||||
|
||||
bits=3; // Lead bytes 0xC0 and 0xC1.
|
||||
for(i=0; i<64; ++i) {
|
||||
table7FF[i]|=bits;
|
||||
}
|
||||
|
||||
bits=1; // Lead byte 0xE0.
|
||||
for(i=0; i<32; ++i) { // First half of 4k block.
|
||||
bmpBlockBits[i]|=bits;
|
||||
}
|
||||
|
||||
mask=~(0x10001<<0xd); // Lead byte 0xED.
|
||||
bits=1<<0xd;
|
||||
for(i=32; i<64; ++i) { // Second half of 4k block.
|
||||
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
|
||||
}
|
||||
} else {
|
||||
// contains(FFFD)==FALSE
|
||||
mask=~(0x10001<<0xd); // Lead byte 0xED.
|
||||
for(i=32; i<64; ++i) { // Second half of 4k block.
|
||||
bmpBlockBits[i]&=mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
|
||||
/* Examples:
|
||||
findCodePoint(c)
|
||||
set list[] c=0 1 3 4 7 8
|
||||
=== ============== ===========
|
||||
[] [110000] 0 0 0 0 0 0
|
||||
[\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
|
||||
[\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
|
||||
[:Any:] [0, 110000] 1 1 1 1 1 1
|
||||
*/
|
||||
|
||||
// Return the smallest i such that c < list[i]. Assume
|
||||
// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
|
||||
if (c < list[lo])
|
||||
return lo;
|
||||
// High runner test. c is often after the last range, so an
|
||||
// initial check for this condition pays off.
|
||||
if (lo >= hi || c >= list[hi-1])
|
||||
return hi;
|
||||
// invariant: c >= list[lo]
|
||||
// invariant: c < list[hi]
|
||||
for (;;) {
|
||||
int32_t i = (lo + hi) >> 1;
|
||||
if (i == lo) {
|
||||
break; // Found!
|
||||
} else if (c < list[i]) {
|
||||
hi = i;
|
||||
} else {
|
||||
lo = i;
|
||||
}
|
||||
}
|
||||
return hi;
|
||||
}
|
||||
|
||||
UBool
|
||||
BMPSet::contains(UChar32 c) const {
|
||||
if((uint32_t)c<=0x7f) {
|
||||
return (UBool)asciiBytes[c];
|
||||
} else if((uint32_t)c<=0x7ff) {
|
||||
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
|
||||
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
return (UBool)twoBits;
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
|
||||
}
|
||||
} else if((uint32_t)c<=0x10ffff) {
|
||||
// surrogate or supplementary code point
|
||||
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
|
||||
} else {
|
||||
// Out-of-range code points get FALSE, consistent with long-standing
|
||||
// behavior of UnicodeSet::contains(c).
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for sufficient length for trail unit for each surrogate pair.
|
||||
* Handle single surrogates as surrogate code points as usual in ICU.
|
||||
*/
|
||||
const UChar *
|
||||
BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
|
||||
UChar c, c2;
|
||||
|
||||
if(spanCondition) {
|
||||
// span
|
||||
do {
|
||||
c=*s;
|
||||
if(c<=0x7f) {
|
||||
if(!asciiBytes[c]) {
|
||||
break;
|
||||
}
|
||||
} else if(c<=0x7ff) {
|
||||
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
|
||||
break;
|
||||
}
|
||||
} else if(c<0xd800 || c>=0xe000) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits==0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
|
||||
// surrogate code point
|
||||
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// surrogate pair
|
||||
if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
|
||||
break;
|
||||
}
|
||||
++s;
|
||||
}
|
||||
} while(++s<limit);
|
||||
} else {
|
||||
// span not
|
||||
do {
|
||||
c=*s;
|
||||
if(c<=0x7f) {
|
||||
if(asciiBytes[c]) {
|
||||
break;
|
||||
}
|
||||
} else if(c<=0x7ff) {
|
||||
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
|
||||
break;
|
||||
}
|
||||
} else if(c<0xd800 || c>=0xe000) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits!=0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
|
||||
// surrogate code point
|
||||
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// surrogate pair
|
||||
if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
|
||||
break;
|
||||
}
|
||||
++s;
|
||||
}
|
||||
} while(++s<limit);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Symmetrical with span(). */
|
||||
const UChar *
|
||||
BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
|
||||
UChar c, c2;
|
||||
|
||||
if(spanCondition) {
|
||||
// span
|
||||
for(;;) {
|
||||
c=*(--limit);
|
||||
if(c<=0x7f) {
|
||||
if(!asciiBytes[c]) {
|
||||
break;
|
||||
}
|
||||
} else if(c<=0x7ff) {
|
||||
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
|
||||
break;
|
||||
}
|
||||
} else if(c<0xd800 || c>=0xe000) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits==0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
|
||||
// surrogate code point
|
||||
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// surrogate pair
|
||||
if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
|
||||
break;
|
||||
}
|
||||
--limit;
|
||||
}
|
||||
if(s==limit) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// span not
|
||||
for(;;) {
|
||||
c=*(--limit);
|
||||
if(c<=0x7f) {
|
||||
if(asciiBytes[c]) {
|
||||
break;
|
||||
}
|
||||
} else if(c<=0x7ff) {
|
||||
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
|
||||
break;
|
||||
}
|
||||
} else if(c<0xd800 || c>=0xe000) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits!=0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
|
||||
// surrogate code point
|
||||
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// surrogate pair
|
||||
if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
|
||||
break;
|
||||
}
|
||||
--limit;
|
||||
}
|
||||
if(s==limit) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
return limit+1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Precheck for sufficient trail bytes at end of string only once per span.
|
||||
* Check validity.
|
||||
*/
|
||||
const uint8_t *
|
||||
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
|
||||
const uint8_t *limit=s+length;
|
||||
uint8_t b=*s;
|
||||
if((int8_t)b>=0) {
|
||||
// Initial all-ASCII span.
|
||||
if(spanCondition) {
|
||||
do {
|
||||
if(!asciiBytes[b] || ++s==limit) {
|
||||
return s;
|
||||
}
|
||||
b=*s;
|
||||
} while((int8_t)b>=0);
|
||||
} else {
|
||||
do {
|
||||
if(asciiBytes[b] || ++s==limit) {
|
||||
return s;
|
||||
}
|
||||
b=*s;
|
||||
} while((int8_t)b>=0);
|
||||
}
|
||||
length=(int32_t)(limit-s);
|
||||
}
|
||||
|
||||
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
|
||||
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
|
||||
}
|
||||
|
||||
const uint8_t *limit0=limit;
|
||||
|
||||
/*
|
||||
* Make sure that the last 1/2/3/4-byte sequence before limit is complete
|
||||
* or runs into a lead byte.
|
||||
* In the span loop compare s with limit only once
|
||||
* per multi-byte character.
|
||||
*
|
||||
* Give a trailing illegal sequence the same value as the result of contains(FFFD),
|
||||
* including it if that is part of the span, otherwise set limit0 to before
|
||||
* the truncated sequence.
|
||||
*/
|
||||
b=*(limit-1);
|
||||
if((int8_t)b<0) {
|
||||
// b>=0x80: lead or trail byte
|
||||
if(b<0xc0) {
|
||||
// single trail byte, check for preceding 3- or 4-byte lead byte
|
||||
if(length>=2 && (b=*(limit-2))>=0xe0) {
|
||||
limit-=2;
|
||||
if(asciiBytes[0x80]!=spanCondition) {
|
||||
limit0=limit;
|
||||
}
|
||||
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
|
||||
// 4-byte lead byte with only two trail bytes
|
||||
limit-=3;
|
||||
if(asciiBytes[0x80]!=spanCondition) {
|
||||
limit0=limit;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// lead byte with no trail bytes
|
||||
--limit;
|
||||
if(asciiBytes[0x80]!=spanCondition) {
|
||||
limit0=limit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t t1, t2, t3;
|
||||
|
||||
while(s<limit) {
|
||||
b=*s;
|
||||
if(b<0xc0) {
|
||||
// ASCII; or trail bytes with the result of contains(FFFD).
|
||||
if(spanCondition) {
|
||||
do {
|
||||
if(!asciiBytes[b]) {
|
||||
return s;
|
||||
} else if(++s==limit) {
|
||||
return limit0;
|
||||
}
|
||||
b=*s;
|
||||
} while(b<0xc0);
|
||||
} else {
|
||||
do {
|
||||
if(asciiBytes[b]) {
|
||||
return s;
|
||||
} else if(++s==limit) {
|
||||
return limit0;
|
||||
}
|
||||
b=*s;
|
||||
} while(b<0xc0);
|
||||
}
|
||||
}
|
||||
++s; // Advance past the lead byte.
|
||||
if(b>=0xe0) {
|
||||
if(b<0xf0) {
|
||||
if( /* handle U+0000..U+FFFF inline */
|
||||
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
|
||||
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
|
||||
) {
|
||||
b&=0xf;
|
||||
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with this lead byte and middle trail byte
|
||||
// are either in the set or not.
|
||||
if(twoBits!=(uint32_t)spanCondition) {
|
||||
return s-1;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
UChar32 c=(b<<12)|(t1<<6)|t2;
|
||||
if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
|
||||
return s-1;
|
||||
}
|
||||
}
|
||||
s+=2;
|
||||
continue;
|
||||
}
|
||||
} else if( /* handle U+10000..U+10FFFF inline */
|
||||
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
|
||||
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
|
||||
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
|
||||
) {
|
||||
// Give an illegal sequence the same value as the result of contains(FFFD).
|
||||
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
|
||||
if( ( (0x10000<=c && c<=0x10ffff) ?
|
||||
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
|
||||
asciiBytes[0x80]
|
||||
) != spanCondition
|
||||
) {
|
||||
return s-1;
|
||||
}
|
||||
s+=3;
|
||||
continue;
|
||||
}
|
||||
} else /* 0xc0<=b<0xe0 */ {
|
||||
if( /* handle U+0000..U+07FF inline */
|
||||
(t1=(uint8_t)(*s-0x80)) <= 0x3f
|
||||
) {
|
||||
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
|
||||
return s-1;
|
||||
}
|
||||
++s;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Give an illegal sequence the same value as the result of contains(FFFD).
|
||||
// Handle each byte of an illegal sequence separately to simplify the code;
|
||||
// no need to optimize error handling.
|
||||
if(asciiBytes[0x80]!=spanCondition) {
|
||||
return s-1;
|
||||
}
|
||||
}
|
||||
|
||||
return limit0;
|
||||
}
|
||||
|
||||
/*
|
||||
* While going backwards through UTF-8 optimize only for ASCII.
|
||||
* Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
|
||||
* possible to tell from the last byte in a multi-byte sequence how many
|
||||
* preceding bytes there should be. Therefore, going backwards through UTF-8
|
||||
* is much harder than going forward.
|
||||
*/
|
||||
int32_t
|
||||
BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
|
||||
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
|
||||
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
|
||||
}
|
||||
|
||||
uint8_t b;
|
||||
|
||||
do {
|
||||
b=s[--length];
|
||||
if((int8_t)b>=0) {
|
||||
// ASCII sub-span
|
||||
if(spanCondition) {
|
||||
do {
|
||||
if(!asciiBytes[b]) {
|
||||
return length+1;
|
||||
} else if(length==0) {
|
||||
return 0;
|
||||
}
|
||||
b=s[--length];
|
||||
} while((int8_t)b>=0);
|
||||
} else {
|
||||
do {
|
||||
if(asciiBytes[b]) {
|
||||
return length+1;
|
||||
} else if(length==0) {
|
||||
return 0;
|
||||
}
|
||||
b=s[--length];
|
||||
} while((int8_t)b>=0);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t prev=length;
|
||||
UChar32 c;
|
||||
// trail byte: collect a multi-byte character
|
||||
// (or lead byte in last-trail position)
|
||||
c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
|
||||
// c is a valid code point, not ASCII, not a surrogate
|
||||
if(c<=0x7ff) {
|
||||
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
|
||||
return prev+1;
|
||||
}
|
||||
} else if(c<=0xffff) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits!=(uint32_t)spanCondition) {
|
||||
return prev+1;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
|
||||
return prev+1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
|
||||
return prev+1;
|
||||
}
|
||||
}
|
||||
} while(length>0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
161
UnicodeConverter/icubuilds-mac/icu/icu/common/bmpset.h
Normal file
161
UnicodeConverter/icubuilds-mac/icu/icu/common/bmpset.h
Normal file
@ -0,0 +1,161 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: bmpset.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2007jan29
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __BMPSET_H__
|
||||
#define __BMPSET_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Helper class for frozen UnicodeSets, implements contains() and span()
|
||||
* optimized for BMP code points. Structured to be UTF-8-friendly.
|
||||
*
|
||||
* ASCII: Look up bytes.
|
||||
* 2-byte characters: Bits organized vertically.
|
||||
* 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
|
||||
* with mixed for illegal ranges.
|
||||
* Supplementary characters: Call contains() on the parent set.
|
||||
*/
|
||||
class BMPSet : public UMemory {
|
||||
public:
|
||||
BMPSet(const int32_t *parentList, int32_t parentListLength);
|
||||
BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
|
||||
virtual ~BMPSet();
|
||||
|
||||
virtual UBool contains(UChar32 c) const;
|
||||
|
||||
/*
|
||||
* Span the initial substring for which each character c has spanCondition==contains(c).
|
||||
* It must be s<limit and spanCondition==0 or 1.
|
||||
* @return The string pointer which limits the span.
|
||||
*/
|
||||
const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
|
||||
|
||||
/*
|
||||
* Span the trailing substring for which each character c has spanCondition==contains(c).
|
||||
* It must be s<limit and spanCondition==0 or 1.
|
||||
* @return The string pointer which starts the span.
|
||||
*/
|
||||
const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
|
||||
|
||||
/*
|
||||
* Span the initial substring for which each character c has spanCondition==contains(c).
|
||||
* It must be length>0 and spanCondition==0 or 1.
|
||||
* @return The string pointer which limits the span.
|
||||
*/
|
||||
const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
|
||||
|
||||
/*
|
||||
* Span the trailing substring for which each character c has spanCondition==contains(c).
|
||||
* It must be length>0 and spanCondition==0 or 1.
|
||||
* @return The start of the span.
|
||||
*/
|
||||
int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
|
||||
|
||||
private:
|
||||
void initBits();
|
||||
void overrideIllegal();
|
||||
|
||||
/**
|
||||
* Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
|
||||
* binary search is restricted for finding code points in a certain range.
|
||||
*
|
||||
* For restricting the search for finding in the range start..end,
|
||||
* pass in
|
||||
* lo=findCodePoint(start) and
|
||||
* hi=findCodePoint(end)
|
||||
* with 0<=lo<=hi<len.
|
||||
* findCodePoint(c) defaults to lo=0 and hi=len-1.
|
||||
*
|
||||
* @param c a character in a subrange of MIN_VALUE..MAX_VALUE
|
||||
* @param lo The lowest index to be returned.
|
||||
* @param hi The highest index to be returned.
|
||||
* @return the smallest integer i in the range lo..hi,
|
||||
* inclusive, such that c < list[i]
|
||||
*/
|
||||
int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
|
||||
|
||||
inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
|
||||
|
||||
/*
|
||||
* One byte per ASCII character, or trail byte in lead position.
|
||||
* 0 or 1 for ASCII characters.
|
||||
* The value for trail bytes is the result of contains(FFFD)
|
||||
* for faster validity checking at runtime.
|
||||
*/
|
||||
UBool asciiBytes[0xc0];
|
||||
|
||||
/*
|
||||
* One bit per code point from U+0000..U+07FF.
|
||||
* The bits are organized vertically; consecutive code points
|
||||
* correspond to the same bit positions in consecutive table words.
|
||||
* With code point parts
|
||||
* lead=c{10..6}
|
||||
* trail=c{5..0}
|
||||
* it is set.contains(c)==(table7FF[trail] bit lead)
|
||||
*
|
||||
* Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
|
||||
* for faster validity checking at runtime.
|
||||
*/
|
||||
uint32_t table7FF[64];
|
||||
|
||||
/*
|
||||
* One bit per 64 BMP code points.
|
||||
* The bits are organized vertically; consecutive 64-code point blocks
|
||||
* correspond to the same bit position in consecutive table words.
|
||||
* With code point parts
|
||||
* lead=c{15..12}
|
||||
* t1=c{11..6}
|
||||
* test bits (lead+16) and lead in bmpBlockBits[t1].
|
||||
* If the upper bit is 0, then the lower bit indicates if contains(c)
|
||||
* for all code points in the 64-block.
|
||||
* If the upper bit is 1, then the block is mixed and set.contains(c)
|
||||
* must be called.
|
||||
*
|
||||
* Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
|
||||
* the result of contains(FFFD) for faster validity checking at runtime.
|
||||
*/
|
||||
uint32_t bmpBlockBits[64];
|
||||
|
||||
/*
|
||||
* Inversion list indexes for restricted binary searches in
|
||||
* findCodePoint(), from
|
||||
* findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
|
||||
* U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
|
||||
* always looked up in the bit tables.
|
||||
* The last pair of indexes is for finding supplementary code points.
|
||||
*/
|
||||
int32_t list4kStarts[18];
|
||||
|
||||
/*
|
||||
* The inversion list of the parent set, for the slower contains() implementation
|
||||
* for mixed BMP blocks and for supplementary code points.
|
||||
* The list is terminated with list[listLength-1]=0x110000.
|
||||
*/
|
||||
const int32_t *list;
|
||||
int32_t listLength;
|
||||
};
|
||||
|
||||
inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
|
||||
return (UBool)(findCodePoint(c, lo, hi) & 1);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
300
UnicodeConverter/icubuilds-mac/icu/icu/common/brkeng.cpp
Normal file
300
UnicodeConverter/icubuilds-mac/icu/icu/common/brkeng.cpp
Normal file
@ -0,0 +1,300 @@
|
||||
/*
|
||||
************************************************************************************
|
||||
* Copyright (C) 2006-2015, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
************************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "brkeng.h"
|
||||
#include "dictbe.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/ucharstrie.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "charstr.h"
|
||||
#include "dictionarydata.h"
|
||||
#include "mutex.h"
|
||||
#include "uvector.h"
|
||||
#include "umutex.h"
|
||||
#include "uresimp.h"
|
||||
#include "ubrkimpl.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
LanguageBreakEngine::LanguageBreakEngine() {
|
||||
}
|
||||
|
||||
LanguageBreakEngine::~LanguageBreakEngine() {
|
||||
}
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
LanguageBreakFactory::LanguageBreakFactory() {
|
||||
}
|
||||
|
||||
LanguageBreakFactory::~LanguageBreakFactory() {
|
||||
}
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
|
||||
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
|
||||
fHandled[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
UnhandledEngine::~UnhandledEngine() {
|
||||
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
|
||||
if (fHandled[i] != 0) {
|
||||
delete fHandled[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
|
||||
return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
|
||||
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
|
||||
}
|
||||
|
||||
int32_t
|
||||
UnhandledEngine::findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &/*foundBreaks*/ ) const {
|
||||
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
|
||||
UChar32 c = utext_current32(text);
|
||||
if (reverse) {
|
||||
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
|
||||
c = utext_previous32(text);
|
||||
}
|
||||
}
|
||||
else {
|
||||
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
||||
c = utext_current32(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
|
||||
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
|
||||
if (fHandled[breakType] == 0) {
|
||||
fHandled[breakType] = new UnicodeSet();
|
||||
if (fHandled[breakType] == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!fHandled[breakType]->contains(c)) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Apply the entire script of the character.
|
||||
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
|
||||
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
|
||||
fEngines = 0;
|
||||
}
|
||||
|
||||
ICULanguageBreakFactory::~ICULanguageBreakFactory() {
|
||||
if (fEngines != 0) {
|
||||
delete fEngines;
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
U_CDECL_BEGIN
|
||||
static void U_CALLCONV _deleteEngine(void *obj) {
|
||||
delete (const icu::LanguageBreakEngine *) obj;
|
||||
}
|
||||
U_CDECL_END
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
|
||||
|
||||
const LanguageBreakEngine *
|
||||
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
||||
const LanguageBreakEngine *lbe = NULL;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
Mutex m(&gBreakEngineMutex);
|
||||
|
||||
if (fEngines == NULL) {
|
||||
UStack *engines = new UStack(_deleteEngine, NULL, status);
|
||||
if (U_FAILURE(status) || engines == NULL) {
|
||||
// Note: no way to return error code to caller.
|
||||
delete engines;
|
||||
return NULL;
|
||||
}
|
||||
fEngines = engines;
|
||||
} else {
|
||||
int32_t i = fEngines->size();
|
||||
while (--i >= 0) {
|
||||
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
|
||||
if (lbe != NULL && lbe->handles(c, breakType)) {
|
||||
return lbe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We didn't find an engine. Create one.
|
||||
lbe = loadEngineFor(c, breakType);
|
||||
if (lbe != NULL) {
|
||||
fEngines->push((void *)lbe, status);
|
||||
}
|
||||
return lbe;
|
||||
}
|
||||
|
||||
const LanguageBreakEngine *
|
||||
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UScriptCode code = uscript_getScript(c, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
|
||||
if (m != NULL) {
|
||||
const LanguageBreakEngine *engine = NULL;
|
||||
switch(code) {
|
||||
case USCRIPT_THAI:
|
||||
engine = new ThaiBreakEngine(m, status);
|
||||
break;
|
||||
case USCRIPT_LAO:
|
||||
engine = new LaoBreakEngine(m, status);
|
||||
break;
|
||||
case USCRIPT_MYANMAR:
|
||||
engine = new BurmeseBreakEngine(m, status);
|
||||
break;
|
||||
case USCRIPT_KHMER:
|
||||
engine = new KhmerBreakEngine(m, status);
|
||||
break;
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
// CJK not available w/o normalization
|
||||
case USCRIPT_HANGUL:
|
||||
engine = new CjkBreakEngine(m, kKorean, status);
|
||||
break;
|
||||
|
||||
// use same BreakEngine and dictionary for both Chinese and Japanese
|
||||
case USCRIPT_HIRAGANA:
|
||||
case USCRIPT_KATAKANA:
|
||||
case USCRIPT_HAN:
|
||||
engine = new CjkBreakEngine(m, kChineseJapanese, status);
|
||||
break;
|
||||
#if 0
|
||||
// TODO: Have to get some characters with script=common handled
|
||||
// by CjkBreakEngine (e.g. U+309B). Simply subjecting
|
||||
// them to CjkBreakEngine does not work. The engine has to
|
||||
// special-case them.
|
||||
case USCRIPT_COMMON:
|
||||
{
|
||||
UBlockCode block = ublock_getCode(code);
|
||||
if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
|
||||
engine = new CjkBreakEngine(dict, kChineseJapanese, status);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (engine == NULL) {
|
||||
delete m;
|
||||
}
|
||||
else if (U_FAILURE(status)) {
|
||||
delete engine;
|
||||
engine = NULL;
|
||||
}
|
||||
return engine;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DictionaryMatcher *
|
||||
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// open root from brkitr tree.
|
||||
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
|
||||
b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
|
||||
int32_t dictnlength = 0;
|
||||
const UChar *dictfname =
|
||||
ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
ures_close(b);
|
||||
return NULL;
|
||||
}
|
||||
CharString dictnbuf;
|
||||
CharString ext;
|
||||
const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
|
||||
if (extStart != NULL) {
|
||||
int32_t len = (int32_t)(extStart - dictfname);
|
||||
ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
|
||||
dictnlength = len;
|
||||
}
|
||||
dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
|
||||
ures_close(b);
|
||||
|
||||
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
// build trie
|
||||
const uint8_t *data = (const uint8_t *)udata_getMemory(file);
|
||||
const int32_t *indexes = (const int32_t *)data;
|
||||
const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
|
||||
const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
|
||||
DictionaryMatcher *m = NULL;
|
||||
if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
|
||||
const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
|
||||
const char *characters = (const char *)(data + offset);
|
||||
m = new BytesDictionaryMatcher(characters, transform, file);
|
||||
}
|
||||
else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
|
||||
const UChar *characters = (const UChar *)(data + offset);
|
||||
m = new UCharsDictionaryMatcher(characters, file);
|
||||
}
|
||||
if (m == NULL) {
|
||||
// no matcher exists to take ownership - either we are an invalid
|
||||
// type or memory allocation failed
|
||||
udata_close(file);
|
||||
}
|
||||
return m;
|
||||
} else if (dictfname != NULL) {
|
||||
// we don't have a dictionary matcher.
|
||||
// returning NULL here will cause us to fail to find a dictionary break engine, as expected
|
||||
status = U_ZERO_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
289
UnicodeConverter/icubuilds-mac/icu/icu/common/brkeng.h
Normal file
289
UnicodeConverter/icubuilds-mac/icu/icu/common/brkeng.h
Normal file
@ -0,0 +1,289 @@
|
||||
/**
|
||||
************************************************************************************
|
||||
* Copyright (C) 2006-2012, International Business Machines Corporation and others. *
|
||||
* All Rights Reserved. *
|
||||
************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef BRKENG_H
|
||||
#define BRKENG_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeSet;
|
||||
class UStack;
|
||||
class DictionaryMatcher;
|
||||
|
||||
/*******************************************************************
|
||||
* LanguageBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>LanguageBreakEngines implement language-specific knowledge for
|
||||
* finding text boundaries within a run of characters belonging to a
|
||||
* specific set. The boundaries will be of a specific kind, e.g. word,
|
||||
* line, etc.</p>
|
||||
*
|
||||
* <p>LanguageBreakEngines should normally be implemented so as to
|
||||
* be shared between threads without locking.</p>
|
||||
*/
|
||||
class LanguageBreakEngine : public UMemory {
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
LanguageBreakEngine();
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~LanguageBreakEngine();
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character for
|
||||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left at the end of the run of characters which the engine
|
||||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param reverse Whether the caller is looking for breaks in a reverse
|
||||
* direction.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &foundBreaks ) const = 0;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* LanguageBreakFactory
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>LanguageBreakFactorys find and return a LanguageBreakEngine
|
||||
* that can determine breaks for characters in a specific set, if
|
||||
* such an object can be found.</p>
|
||||
*
|
||||
* <p>If a LanguageBreakFactory is to be shared between threads,
|
||||
* appropriate synchronization must be used; there is none internal
|
||||
* to the factory.</p>
|
||||
*
|
||||
* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
|
||||
* normally be shared between threads without synchronization, unless
|
||||
* the specific subclass of LanguageBreakFactory indicates otherwise.</p>
|
||||
*
|
||||
* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
|
||||
* it returns when it itself is deleted, unless the specific subclass of
|
||||
* LanguageBreakFactory indicates otherwise. Naturally, the factory should
|
||||
* not be deleted until the LanguageBreakEngines it has returned are no
|
||||
* longer needed.</p>
|
||||
*/
|
||||
class LanguageBreakFactory : public UMemory {
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
LanguageBreakFactory();
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~LanguageBreakFactory();
|
||||
|
||||
/**
|
||||
* <p>Find and return a LanguageBreakEngine that can find the desired
|
||||
* kind of break for the set of characters to which the supplied
|
||||
* character belongs. It is up to the set of available engines to
|
||||
* determine what the sets of characters are.</p>
|
||||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* UnhandledEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
|
||||
* handles characters that no other LanguageBreakEngine is available to
|
||||
* handle. It is told the character and the type of break; at its
|
||||
* discretion it may handle more than the specified character (e.g.,
|
||||
* the entire script to which that character belongs.</p>
|
||||
*
|
||||
* <p>UnhandledEngines may not be shared between threads without
|
||||
* external synchronization.</p>
|
||||
*/
|
||||
|
||||
class UnhandledEngine : public LanguageBreakEngine {
|
||||
private:
|
||||
|
||||
/**
|
||||
* The sets of characters handled, for each break type
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet *fHandled[4];
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
UnhandledEngine(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~UnhandledEngine();
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character for
|
||||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c, int32_t breakType) const;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A UText representing the text (TODO: UText). The
|
||||
* iterator is left at the end of the run of characters which the engine
|
||||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param reverse Whether the caller is looking for breaks in a reverse
|
||||
* direction.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
/**
|
||||
* <p>Tell the engine to handle a particular character and break type.</p>
|
||||
*
|
||||
* @param c A character which the engine should handle
|
||||
* @param breakType The type of text break for which the engine should handle c
|
||||
*/
|
||||
virtual void handleCharacter(UChar32 c, int32_t breakType);
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* ICULanguageBreakFactory
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
|
||||
* ICU. It creates dictionary-based LanguageBreakEngines from dictionary
|
||||
* data in the ICU data file.</p>
|
||||
*/
|
||||
class ICULanguageBreakFactory : public LanguageBreakFactory {
|
||||
private:
|
||||
|
||||
/**
|
||||
* The stack of break engines created by this factory
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UStack *fEngines;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Standard constructor.</p>
|
||||
*
|
||||
*/
|
||||
ICULanguageBreakFactory(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~ICULanguageBreakFactory();
|
||||
|
||||
/**
|
||||
* <p>Find and return a LanguageBreakEngine that can find the desired
|
||||
* kind of break for the set of characters to which the supplied
|
||||
* character belongs. It is up to the set of available engines to
|
||||
* determine what the sets of characters are.</p>
|
||||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* <p>Create a LanguageBreakEngine for the set of characters to which
|
||||
* the supplied character belongs, for the specified break type.</p>
|
||||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
|
||||
|
||||
/**
|
||||
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
|
||||
* @param script An ISO 15924 script code that identifies the dictionary to be
|
||||
* created.
|
||||
* @param breakType The kind of text break for which a dictionary is
|
||||
* sought.
|
||||
* @return A DictionaryMatcher with the desired characteristics, or NULL.
|
||||
*/
|
||||
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* BRKENG_H */
|
||||
#endif
|
||||
492
UnicodeConverter/icubuilds-mac/icu/icu/common/brkiter.cpp
Normal file
492
UnicodeConverter/icubuilds-mac/icu/icu/common/brkiter.cpp
Normal file
@ -0,0 +1,492 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* File brkiter.cpp
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/18/97 aliu Converted from OpenClass. Added DONE.
|
||||
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
||||
// *****************************************************************************
|
||||
// This file was generated from the java source file BreakIterator.java
|
||||
// *****************************************************************************
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/filteredbrk.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "cstring.h"
|
||||
#include "umutex.h"
|
||||
#include "servloc.h"
|
||||
#include "locbased.h"
|
||||
#include "uresimp.h"
|
||||
#include "uassert.h"
|
||||
#include "ubrkimpl.h"
|
||||
#include "charstr.h"
|
||||
|
||||
// *****************************************************************************
|
||||
// class BreakIterator
|
||||
// This class implements methods for finding the location of boundaries in text.
|
||||
// Instances of BreakIterator maintain a current position and scan over text
|
||||
// returning the index of characters where boundaries occur.
|
||||
// *****************************************************************************
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
BreakIterator*
|
||||
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
|
||||
{
|
||||
char fnbuff[256];
|
||||
char ext[4]={'\0'};
|
||||
CharString actualLocale;
|
||||
int32_t size;
|
||||
const UChar* brkfname = NULL;
|
||||
UResourceBundle brkRulesStack;
|
||||
UResourceBundle brkNameStack;
|
||||
UResourceBundle *brkRules = &brkRulesStack;
|
||||
UResourceBundle *brkName = &brkNameStack;
|
||||
RuleBasedBreakIterator *result = NULL;
|
||||
|
||||
if (U_FAILURE(status))
|
||||
return NULL;
|
||||
|
||||
ures_initStackObject(brkRules);
|
||||
ures_initStackObject(brkName);
|
||||
|
||||
// Get the locale
|
||||
UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
|
||||
|
||||
// Get the "boundaries" array.
|
||||
if (U_SUCCESS(status)) {
|
||||
brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
|
||||
// Get the string object naming the rules file
|
||||
brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
|
||||
// Get the actual string
|
||||
brkfname = ures_getString(brkName, &size, &status);
|
||||
U_ASSERT((size_t)size<sizeof(fnbuff));
|
||||
if ((size_t)size>=sizeof(fnbuff)) {
|
||||
size=0;
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
// Use the string if we found it
|
||||
if (U_SUCCESS(status) && brkfname) {
|
||||
actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
|
||||
|
||||
UChar* extStart=u_strchr(brkfname, 0x002e);
|
||||
int len = 0;
|
||||
if(extStart!=NULL){
|
||||
len = (int)(extStart-brkfname);
|
||||
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
|
||||
u_UCharsToChars(brkfname, fnbuff, len);
|
||||
}
|
||||
fnbuff[len]=0; // nul terminate
|
||||
}
|
||||
}
|
||||
|
||||
ures_close(brkRules);
|
||||
ures_close(brkName);
|
||||
|
||||
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
ures_close(b);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Create a RuleBasedBreakIterator
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
|
||||
// If there is a result, set the valid locale and actual locale, and the kind
|
||||
if (U_SUCCESS(status) && result != NULL) {
|
||||
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
|
||||
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
|
||||
actualLocale.data());
|
||||
result->setBreakType(kind);
|
||||
}
|
||||
|
||||
ures_close(b);
|
||||
|
||||
if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
|
||||
delete result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (result == NULL) {
|
||||
udata_close(file);
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Creates a break iterator for word breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_WORD, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for line breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_LINE, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for character breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_CHARACTER, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for sentence breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_SENTENCE, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for title casing breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_TITLE, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Gets all the available locales that has localized text boundary data.
|
||||
const Locale* U_EXPORT2
|
||||
BreakIterator::getAvailableLocales(int32_t& count)
|
||||
{
|
||||
return Locale::getAvailableLocales(count);
|
||||
}
|
||||
|
||||
// ------------------------------------------
|
||||
//
|
||||
// Default constructor and destructor
|
||||
//
|
||||
//-------------------------------------------
|
||||
|
||||
BreakIterator::BreakIterator()
|
||||
{
|
||||
*validLocale = *actualLocale = 0;
|
||||
}
|
||||
|
||||
BreakIterator::~BreakIterator()
|
||||
{
|
||||
}
|
||||
|
||||
// ------------------------------------------
|
||||
//
|
||||
// Registration
|
||||
//
|
||||
//-------------------------------------------
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
|
||||
public:
|
||||
virtual ~ICUBreakIteratorFactory();
|
||||
protected:
|
||||
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
|
||||
return BreakIterator::makeInstance(loc, kind, status);
|
||||
}
|
||||
};
|
||||
|
||||
ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
class ICUBreakIteratorService : public ICULocaleService {
|
||||
public:
|
||||
ICUBreakIteratorService()
|
||||
: ICULocaleService(UNICODE_STRING("Break Iterator", 14))
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
registerFactory(new ICUBreakIteratorFactory(), status);
|
||||
}
|
||||
|
||||
virtual ~ICUBreakIteratorService();
|
||||
|
||||
virtual UObject* cloneInstance(UObject* instance) const {
|
||||
return ((BreakIterator*)instance)->clone();
|
||||
}
|
||||
|
||||
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
|
||||
LocaleKey& lkey = (LocaleKey&)key;
|
||||
int32_t kind = lkey.kind();
|
||||
Locale loc;
|
||||
lkey.currentLocale(loc);
|
||||
return BreakIterator::makeInstance(loc, kind, status);
|
||||
}
|
||||
|
||||
virtual UBool isDefault() const {
|
||||
return countFactories() == 1;
|
||||
}
|
||||
};
|
||||
|
||||
ICUBreakIteratorService::~ICUBreakIteratorService() {}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// defined in ucln_cmn.h
|
||||
U_NAMESPACE_END
|
||||
|
||||
static icu::UInitOnce gInitOnce;
|
||||
static icu::ICULocaleService* gService = NULL;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Release all static memory held by breakiterator.
|
||||
*/
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV breakiterator_cleanup(void) {
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
if (gService) {
|
||||
delete gService;
|
||||
gService = NULL;
|
||||
}
|
||||
gInitOnce.reset();
|
||||
#endif
|
||||
return TRUE;
|
||||
}
|
||||
U_CDECL_END
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
static void U_CALLCONV
|
||||
initService(void) {
|
||||
gService = new ICUBreakIteratorService();
|
||||
ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
|
||||
}
|
||||
|
||||
static ICULocaleService*
|
||||
getService(void)
|
||||
{
|
||||
umtx_initOnce(gInitOnce, &initService);
|
||||
return gService;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
static inline UBool
|
||||
hasService(void)
|
||||
{
|
||||
return !gInitOnce.isReset() && getService() != NULL;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
URegistryKey U_EXPORT2
|
||||
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
|
||||
{
|
||||
ICULocaleService *service = getService();
|
||||
if (service == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
return service->registerInstance(toAdopt, locale, kind, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
UBool U_EXPORT2
|
||||
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
if (hasService()) {
|
||||
return gService->unregister(key, status);
|
||||
}
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
StringEnumeration* U_EXPORT2
|
||||
BreakIterator::getAvailableLocales(void)
|
||||
{
|
||||
ICULocaleService *service = getService();
|
||||
if (service == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return service->getAvailableLocales();
|
||||
}
|
||||
#endif /* UCONFIG_NO_SERVICE */
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
BreakIterator*
|
||||
BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
if (hasService()) {
|
||||
Locale actualLoc("");
|
||||
BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
|
||||
// TODO: The way the service code works in ICU 2.8 is that if
|
||||
// there is a real registered break iterator, the actualLoc
|
||||
// will be populated, but if the handleDefault path is taken
|
||||
// (because nothing is registered that can handle the
|
||||
// requested locale) then the actualLoc comes back empty. In
|
||||
// that case, the returned object already has its actual/valid
|
||||
// locale data populated (by makeInstance, which is what
|
||||
// handleDefault calls), so we don't touch it. YES, A COMMENT
|
||||
// THIS LONG is a sign of bad code -- so the action item is to
|
||||
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
|
||||
if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
|
||||
U_LOCALE_BASED(locBased, *result);
|
||||
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
return makeInstance(loc, kind, status);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
enum { kKeyValueLenMax = 32 };
|
||||
|
||||
BreakIterator*
|
||||
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
||||
{
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
char lbType[kKeyValueLenMax];
|
||||
|
||||
BreakIterator *result = NULL;
|
||||
switch (kind) {
|
||||
case UBRK_CHARACTER:
|
||||
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
|
||||
break;
|
||||
case UBRK_WORD:
|
||||
result = BreakIterator::buildInstance(loc, "word", kind, status);
|
||||
break;
|
||||
case UBRK_LINE:
|
||||
uprv_strcpy(lbType, "line");
|
||||
{
|
||||
char lbKeyValue[kKeyValueLenMax] = {0};
|
||||
UErrorCode kvStatus = U_ZERO_ERROR;
|
||||
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
|
||||
if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
|
||||
uprv_strcat(lbType, "_");
|
||||
uprv_strcat(lbType, lbKeyValue);
|
||||
}
|
||||
}
|
||||
result = BreakIterator::buildInstance(loc, lbType, kind, status);
|
||||
break;
|
||||
case UBRK_SENTENCE:
|
||||
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
|
||||
{
|
||||
char ssKeyValue[kKeyValueLenMax] = {0};
|
||||
UErrorCode kvStatus = U_ZERO_ERROR;
|
||||
int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
|
||||
if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
|
||||
FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
|
||||
if (U_SUCCESS(kvStatus)) {
|
||||
result = fbiBuilder->build(result, status);
|
||||
delete fbiBuilder;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case UBRK_TITLE:
|
||||
result = BreakIterator::buildInstance(loc, "title", kind, status);
|
||||
break;
|
||||
default:
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Locale
|
||||
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
U_LOCALE_BASED(locBased, *this);
|
||||
return locBased.getLocale(type, status);
|
||||
}
|
||||
|
||||
const char *
|
||||
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
U_LOCALE_BASED(locBased, *this);
|
||||
return locBased.getLocaleID(type, status);
|
||||
}
|
||||
|
||||
|
||||
// This implementation of getRuleStatus is a do-nothing stub, here to
|
||||
// provide a default implementation for any derived BreakIterator classes that
|
||||
// do not implement it themselves.
|
||||
int32_t BreakIterator::getRuleStatus() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This implementation of getRuleStatusVec is a do-nothing stub, here to
|
||||
// provide a default implementation for any derived BreakIterator classes that
|
||||
// do not implement it themselves.
|
||||
int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
if (capacity < 1) {
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
return 1;
|
||||
}
|
||||
*fillInVec = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
|
||||
U_LOCALE_BASED(locBased, (*this));
|
||||
locBased.setLocaleIDs(valid, actual);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
//eof
|
||||
77
UnicodeConverter/icubuilds-mac/icu/icu/common/bytestream.cpp
Normal file
77
UnicodeConverter/icubuilds-mac/icu/icu/common/bytestream.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
// Copyright (C) 2009-2011, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2007 Google Inc. All Rights Reserved.
|
||||
// Author: sanjay@google.com (Sanjay Ghemawat)
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestream.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
ByteSink::~ByteSink() {}
|
||||
|
||||
char* ByteSink::GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t /*desired_capacity_hint*/,
|
||||
char* scratch, int32_t scratch_capacity,
|
||||
int32_t* result_capacity) {
|
||||
if (min_capacity < 1 || scratch_capacity < min_capacity) {
|
||||
*result_capacity = 0;
|
||||
return NULL;
|
||||
}
|
||||
*result_capacity = scratch_capacity;
|
||||
return scratch;
|
||||
}
|
||||
|
||||
void ByteSink::Flush() {}
|
||||
|
||||
CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
|
||||
: outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
|
||||
size_(0), appended_(0), overflowed_(FALSE) {
|
||||
}
|
||||
|
||||
CheckedArrayByteSink::~CheckedArrayByteSink() {}
|
||||
|
||||
CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
|
||||
size_ = appended_ = 0;
|
||||
overflowed_ = FALSE;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
|
||||
if (n <= 0) {
|
||||
return;
|
||||
}
|
||||
appended_ += n;
|
||||
int32_t available = capacity_ - size_;
|
||||
if (n > available) {
|
||||
n = available;
|
||||
overflowed_ = TRUE;
|
||||
}
|
||||
if (n > 0 && bytes != (outbuf_ + size_)) {
|
||||
uprv_memcpy(outbuf_ + size_, bytes, n);
|
||||
}
|
||||
size_ += n;
|
||||
}
|
||||
|
||||
char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t /*desired_capacity_hint*/,
|
||||
char* scratch,
|
||||
int32_t scratch_capacity,
|
||||
int32_t* result_capacity) {
|
||||
if (min_capacity < 1 || scratch_capacity < min_capacity) {
|
||||
*result_capacity = 0;
|
||||
return NULL;
|
||||
}
|
||||
int32_t available = capacity_ - size_;
|
||||
if (available >= min_capacity) {
|
||||
*result_capacity = available;
|
||||
return outbuf_ + size_;
|
||||
} else {
|
||||
*result_capacity = scratch_capacity;
|
||||
return scratch;
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
439
UnicodeConverter/icubuilds-mac/icu/icu/common/bytestrie.cpp
Normal file
439
UnicodeConverter/icubuilds-mac/icu/icu/common/bytestrie.cpp
Normal file
@ -0,0 +1,439 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytestrie.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010sep25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cmemory.h"
|
||||
#include "uassert.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
BytesTrie::~BytesTrie() {
|
||||
uprv_free(ownedArray_);
|
||||
}
|
||||
|
||||
// lead byte already shifted right by 1.
|
||||
int32_t
|
||||
BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
|
||||
int32_t value;
|
||||
if(leadByte<kMinTwoByteValueLead) {
|
||||
value=leadByte-kMinOneByteValueLead;
|
||||
} else if(leadByte<kMinThreeByteValueLead) {
|
||||
value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
|
||||
} else if(leadByte<kFourByteValueLead) {
|
||||
value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
|
||||
} else if(leadByte==kFourByteValueLead) {
|
||||
value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
|
||||
} else {
|
||||
value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
const uint8_t *
|
||||
BytesTrie::jumpByDelta(const uint8_t *pos) {
|
||||
int32_t delta=*pos++;
|
||||
if(delta<kMinTwoByteDeltaLead) {
|
||||
// nothing to do
|
||||
} else if(delta<kMinThreeByteDeltaLead) {
|
||||
delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
|
||||
} else if(delta<kFourByteDeltaLead) {
|
||||
delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
|
||||
pos+=2;
|
||||
} else if(delta==kFourByteDeltaLead) {
|
||||
delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
|
||||
pos+=3;
|
||||
} else {
|
||||
delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
|
||||
pos+=4;
|
||||
}
|
||||
return pos+delta;
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
BytesTrie::current() const {
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
} else {
|
||||
int32_t node;
|
||||
return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
|
||||
// Branch according to the current byte.
|
||||
if(length==0) {
|
||||
length=*pos++;
|
||||
}
|
||||
++length;
|
||||
// The length of the branch is the number of bytes to select from.
|
||||
// The data structure encodes a binary search.
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
if(inByte<*pos++) {
|
||||
length>>=1;
|
||||
pos=jumpByDelta(pos);
|
||||
} else {
|
||||
length=length-(length>>1);
|
||||
pos=skipDelta(pos);
|
||||
}
|
||||
}
|
||||
// Drop down to linear search for the last few bytes.
|
||||
// length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
|
||||
// and divides length by 2.
|
||||
do {
|
||||
if(inByte==*pos++) {
|
||||
UStringTrieResult result;
|
||||
int32_t node=*pos;
|
||||
U_ASSERT(node>=kMinValueLead);
|
||||
if(node&kValueIsFinal) {
|
||||
// Leave the final value for getValue() to read.
|
||||
result=USTRINGTRIE_FINAL_VALUE;
|
||||
} else {
|
||||
// Use the non-final value as the jump delta.
|
||||
++pos;
|
||||
// int32_t delta=readValue(pos, node>>1);
|
||||
node>>=1;
|
||||
int32_t delta;
|
||||
if(node<kMinTwoByteValueLead) {
|
||||
delta=node-kMinOneByteValueLead;
|
||||
} else if(node<kMinThreeByteValueLead) {
|
||||
delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
|
||||
} else if(node<kFourByteValueLead) {
|
||||
delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
|
||||
pos+=2;
|
||||
} else if(node==kFourByteValueLead) {
|
||||
delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
|
||||
pos+=3;
|
||||
} else {
|
||||
delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
|
||||
pos+=4;
|
||||
}
|
||||
// end readValue()
|
||||
pos+=delta;
|
||||
node=*pos;
|
||||
result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
pos_=pos;
|
||||
return result;
|
||||
}
|
||||
--length;
|
||||
pos=skipValue(pos);
|
||||
} while(length>1);
|
||||
if(inByte==*pos++) {
|
||||
pos_=pos;
|
||||
int32_t node=*pos;
|
||||
return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
stop();
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
|
||||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node<kMinLinearMatch) {
|
||||
return branchNext(pos, node, inByte);
|
||||
} else if(node<kMinValueLead) {
|
||||
// Match the first of length+1 bytes.
|
||||
int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
|
||||
if(inByte==*pos++) {
|
||||
remainingMatchLength_=--length;
|
||||
pos_=pos;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
// No match.
|
||||
break;
|
||||
}
|
||||
} else if(node&kValueIsFinal) {
|
||||
// No further matching bytes.
|
||||
break;
|
||||
} else {
|
||||
// Skip intermediate value.
|
||||
pos=skipValue(pos, node);
|
||||
// The next node must not also be a value node.
|
||||
U_ASSERT(*pos<kMinValueLead);
|
||||
}
|
||||
}
|
||||
stop();
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
BytesTrie::next(int32_t inByte) {
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
if(inByte<0) {
|
||||
inByte+=0x100;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
if(length>=0) {
|
||||
// Remaining part of a linear-match node.
|
||||
if(inByte==*pos++) {
|
||||
remainingMatchLength_=--length;
|
||||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
stop();
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
}
|
||||
return nextImpl(pos, inByte);
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
BytesTrie::next(const char *s, int32_t sLength) {
|
||||
if(sLength<0 ? *s==0 : sLength==0) {
|
||||
// Empty input.
|
||||
return current();
|
||||
}
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
for(;;) {
|
||||
// Fetch the next input byte, if there is one.
|
||||
// Continue a linear-match node without rechecking sLength<0.
|
||||
int32_t inByte;
|
||||
if(sLength<0) {
|
||||
for(;;) {
|
||||
if((inByte=*s++)==0) {
|
||||
remainingMatchLength_=length;
|
||||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
if(length<0) {
|
||||
remainingMatchLength_=length;
|
||||
break;
|
||||
}
|
||||
if(inByte!=*pos) {
|
||||
stop();
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
}
|
||||
} else {
|
||||
for(;;) {
|
||||
if(sLength==0) {
|
||||
remainingMatchLength_=length;
|
||||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
inByte=*s++;
|
||||
--sLength;
|
||||
if(length<0) {
|
||||
remainingMatchLength_=length;
|
||||
break;
|
||||
}
|
||||
if(inByte!=*pos) {
|
||||
stop();
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
}
|
||||
}
|
||||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node<kMinLinearMatch) {
|
||||
UStringTrieResult result=branchNext(pos, node, inByte);
|
||||
if(result==USTRINGTRIE_NO_MATCH) {
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
// Fetch the next input byte, if there is one.
|
||||
if(sLength<0) {
|
||||
if((inByte=*s++)==0) {
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
if(sLength==0) {
|
||||
return result;
|
||||
}
|
||||
inByte=*s++;
|
||||
--sLength;
|
||||
}
|
||||
if(result==USTRINGTRIE_FINAL_VALUE) {
|
||||
// No further matching bytes.
|
||||
stop();
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
|
||||
} else if(node<kMinValueLead) {
|
||||
// Match length+1 bytes.
|
||||
length=node-kMinLinearMatch; // Actual match length minus 1.
|
||||
if(inByte!=*pos) {
|
||||
stop();
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
break;
|
||||
} else if(node&kValueIsFinal) {
|
||||
// No further matching bytes.
|
||||
stop();
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
} else {
|
||||
// Skip intermediate value.
|
||||
pos=skipValue(pos, node);
|
||||
// The next node must not also be a value node.
|
||||
U_ASSERT(*pos<kMinValueLead);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uint8_t *
|
||||
BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
|
||||
UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
|
||||
return NULL;
|
||||
}
|
||||
length=length-(length>>1);
|
||||
pos=skipDelta(pos);
|
||||
}
|
||||
do {
|
||||
++pos; // ignore a comparison byte
|
||||
// handle its value
|
||||
int32_t node=*pos++;
|
||||
UBool isFinal=(UBool)(node&kValueIsFinal);
|
||||
int32_t value=readValue(pos, node>>1);
|
||||
pos=skipValue(pos, node);
|
||||
if(isFinal) {
|
||||
if(haveUniqueValue) {
|
||||
if(value!=uniqueValue) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
uniqueValue=value;
|
||||
haveUniqueValue=TRUE;
|
||||
}
|
||||
} else {
|
||||
if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
|
||||
return NULL;
|
||||
}
|
||||
haveUniqueValue=TRUE;
|
||||
}
|
||||
} while(--length>1);
|
||||
return pos+1; // ignore the last comparison byte
|
||||
}
|
||||
|
||||
UBool
|
||||
BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node<kMinLinearMatch) {
|
||||
if(node==0) {
|
||||
node=*pos++;
|
||||
}
|
||||
pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
|
||||
if(pos==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
haveUniqueValue=TRUE;
|
||||
} else if(node<kMinValueLead) {
|
||||
// linear-match node
|
||||
pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
|
||||
} else {
|
||||
UBool isFinal=(UBool)(node&kValueIsFinal);
|
||||
int32_t value=readValue(pos, node>>1);
|
||||
if(haveUniqueValue) {
|
||||
if(value!=uniqueValue) {
|
||||
return FALSE;
|
||||
}
|
||||
} else {
|
||||
uniqueValue=value;
|
||||
haveUniqueValue=TRUE;
|
||||
}
|
||||
if(isFinal) {
|
||||
return TRUE;
|
||||
}
|
||||
pos=skipValue(pos, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrie::getNextBytes(ByteSink &out) const {
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return 0;
|
||||
}
|
||||
if(remainingMatchLength_>=0) {
|
||||
append(out, *pos); // Next byte of a pending linear-match node.
|
||||
return 1;
|
||||
}
|
||||
int32_t node=*pos++;
|
||||
if(node>=kMinValueLead) {
|
||||
if(node&kValueIsFinal) {
|
||||
return 0;
|
||||
} else {
|
||||
pos=skipValue(pos, node);
|
||||
node=*pos++;
|
||||
U_ASSERT(node<kMinValueLead);
|
||||
}
|
||||
}
|
||||
if(node<kMinLinearMatch) {
|
||||
if(node==0) {
|
||||
node=*pos++;
|
||||
}
|
||||
getNextBranchBytes(pos, ++node, out);
|
||||
return node;
|
||||
} else {
|
||||
// First byte of the linear-match node.
|
||||
append(out, *pos);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
getNextBranchBytes(jumpByDelta(pos), length>>1, out);
|
||||
length=length-(length>>1);
|
||||
pos=skipDelta(pos);
|
||||
}
|
||||
do {
|
||||
append(out, *pos++);
|
||||
pos=skipValue(pos);
|
||||
} while(--length>1);
|
||||
append(out, *pos);
|
||||
}
|
||||
|
||||
void
|
||||
BytesTrie::append(ByteSink &out, int c) {
|
||||
char ch=(char)c;
|
||||
out.Append(&ch, 1);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
@ -0,0 +1,501 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytestriebuilder.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010sep25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/bytestriebuilder.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "uhash.h"
|
||||
#include "uarrsort.h"
|
||||
#include "uassert.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Note: This builder implementation stores (bytes, value) pairs with full copies
|
||||
* of the byte sequences, until the BytesTrie is built.
|
||||
* It might(!) take less memory if we collected the data in a temporary, dynamic trie.
|
||||
*/
|
||||
|
||||
class BytesTrieElement : public UMemory {
|
||||
public:
|
||||
// Use compiler's default constructor, initializes nothing.
|
||||
|
||||
void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode);
|
||||
|
||||
StringPiece getString(const CharString &strings) const {
|
||||
int32_t offset=stringOffset;
|
||||
int32_t length;
|
||||
if(offset>=0) {
|
||||
length=(uint8_t)strings[offset++];
|
||||
} else {
|
||||
offset=~offset;
|
||||
length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
|
||||
offset+=2;
|
||||
}
|
||||
return StringPiece(strings.data()+offset, length);
|
||||
}
|
||||
int32_t getStringLength(const CharString &strings) const {
|
||||
int32_t offset=stringOffset;
|
||||
if(offset>=0) {
|
||||
return (uint8_t)strings[offset];
|
||||
} else {
|
||||
offset=~offset;
|
||||
return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
|
||||
}
|
||||
}
|
||||
|
||||
char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
|
||||
|
||||
int32_t getValue() const { return value; }
|
||||
|
||||
int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
|
||||
|
||||
private:
|
||||
const char *data(const CharString &strings) const {
|
||||
int32_t offset=stringOffset;
|
||||
if(offset>=0) {
|
||||
++offset;
|
||||
} else {
|
||||
offset=~offset+2;
|
||||
}
|
||||
return strings.data()+offset;
|
||||
}
|
||||
|
||||
// If the stringOffset is non-negative, then the first strings byte contains
|
||||
// the string length.
|
||||
// If the stringOffset is negative, then the first two strings bytes contain
|
||||
// the string length (big-endian), and the offset needs to be bit-inverted.
|
||||
// (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
|
||||
int32_t stringOffset;
|
||||
int32_t value;
|
||||
};
|
||||
|
||||
void
|
||||
BytesTrieElement::setTo(const StringPiece &s, int32_t val,
|
||||
CharString &strings, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
int32_t length=s.length();
|
||||
if(length>0xffff) {
|
||||
// Too long: We store the length in 1 or 2 bytes.
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t offset=strings.length();
|
||||
if(length>0xff) {
|
||||
offset=~offset;
|
||||
strings.append((char)(length>>8), errorCode);
|
||||
}
|
||||
strings.append((char)length, errorCode);
|
||||
stringOffset=offset;
|
||||
value=val;
|
||||
strings.append(s, errorCode);
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
|
||||
// TODO: add StringPiece::compare(), see ticket #8187
|
||||
StringPiece thisString=getString(strings);
|
||||
StringPiece otherString=other.getString(strings);
|
||||
int32_t lengthDiff=thisString.length()-otherString.length();
|
||||
int32_t commonLength;
|
||||
if(lengthDiff<=0) {
|
||||
commonLength=thisString.length();
|
||||
} else {
|
||||
commonLength=otherString.length();
|
||||
}
|
||||
int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
|
||||
return diff!=0 ? diff : lengthDiff;
|
||||
}
|
||||
|
||||
BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
|
||||
: strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
|
||||
bytes(NULL), bytesCapacity(0), bytesLength(0) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
strings=new CharString();
|
||||
if(strings==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
BytesTrieBuilder::~BytesTrieBuilder() {
|
||||
delete strings;
|
||||
delete[] elements;
|
||||
uprv_free(bytes);
|
||||
}
|
||||
|
||||
BytesTrieBuilder &
|
||||
BytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return *this;
|
||||
}
|
||||
if(bytesLength>0) {
|
||||
// Cannot add elements after building.
|
||||
errorCode=U_NO_WRITE_PERMISSION;
|
||||
return *this;
|
||||
}
|
||||
if(elementsLength==elementsCapacity) {
|
||||
int32_t newCapacity;
|
||||
if(elementsCapacity==0) {
|
||||
newCapacity=1024;
|
||||
} else {
|
||||
newCapacity=4*elementsCapacity;
|
||||
}
|
||||
BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
|
||||
if(newElements==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this; // error instead of dereferencing null
|
||||
}
|
||||
if(elementsLength>0) {
|
||||
uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement));
|
||||
}
|
||||
delete[] elements;
|
||||
elements=newElements;
|
||||
elementsCapacity=newCapacity;
|
||||
}
|
||||
elements[elementsLength++].setTo(s, value, *strings, errorCode);
|
||||
return *this;
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
compareElementStrings(const void *context, const void *left, const void *right) {
|
||||
const CharString *strings=static_cast<const CharString *>(context);
|
||||
const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
|
||||
const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
|
||||
return leftElement->compareStringTo(*rightElement, *strings);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
BytesTrie *
|
||||
BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
buildBytes(buildOption, errorCode);
|
||||
BytesTrie *newTrie=NULL;
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
|
||||
if(newTrie==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
bytes=NULL; // The new trie now owns the array.
|
||||
bytesCapacity=0;
|
||||
}
|
||||
}
|
||||
return newTrie;
|
||||
}
|
||||
|
||||
StringPiece
|
||||
BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
buildBytes(buildOption, errorCode);
|
||||
StringPiece result;
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if(bytes!=NULL && bytesLength>0) {
|
||||
// Already built.
|
||||
return;
|
||||
}
|
||||
if(bytesLength==0) {
|
||||
if(elementsLength==0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return;
|
||||
}
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
|
||||
compareElementStrings, strings,
|
||||
FALSE, // need not be a stable sort
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
// Duplicate strings are not allowed.
|
||||
StringPiece prev=elements[0].getString(*strings);
|
||||
for(int32_t i=1; i<elementsLength; ++i) {
|
||||
StringPiece current=elements[i].getString(*strings);
|
||||
if(prev==current) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
prev=current;
|
||||
}
|
||||
}
|
||||
// Create and byte-serialize the trie for the elements.
|
||||
bytesLength=0;
|
||||
int32_t capacity=strings->length();
|
||||
if(capacity<1024) {
|
||||
capacity=1024;
|
||||
}
|
||||
if(bytesCapacity<capacity) {
|
||||
uprv_free(bytes);
|
||||
bytes=static_cast<char *>(uprv_malloc(capacity));
|
||||
if(bytes==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
bytesCapacity=0;
|
||||
return;
|
||||
}
|
||||
bytesCapacity=capacity;
|
||||
}
|
||||
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
|
||||
if(bytes==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
BytesTrieBuilder &
|
||||
BytesTrieBuilder::clear() {
|
||||
strings->clear();
|
||||
elementsLength=0;
|
||||
bytesLength=0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::getElementStringLength(int32_t i) const {
|
||||
return elements[i].getStringLength(*strings);
|
||||
}
|
||||
|
||||
UChar
|
||||
BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
|
||||
return (uint8_t)elements[i].charAt(byteIndex, *strings);
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::getElementValue(int32_t i) const {
|
||||
return elements[i].getValue();
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
|
||||
const BytesTrieElement &firstElement=elements[first];
|
||||
const BytesTrieElement &lastElement=elements[last];
|
||||
int32_t minStringLength=firstElement.getStringLength(*strings);
|
||||
while(++byteIndex<minStringLength &&
|
||||
firstElement.charAt(byteIndex, *strings)==
|
||||
lastElement.charAt(byteIndex, *strings)) {}
|
||||
return byteIndex;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
|
||||
int32_t length=0; // Number of different bytes at byteIndex.
|
||||
int32_t i=start;
|
||||
do {
|
||||
char byte=elements[i++].charAt(byteIndex, *strings);
|
||||
while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
|
||||
++i;
|
||||
}
|
||||
++length;
|
||||
} while(i<limit);
|
||||
return length;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
|
||||
do {
|
||||
char byte=elements[i++].charAt(byteIndex, *strings);
|
||||
while(byte==elements[i].charAt(byteIndex, *strings)) {
|
||||
++i;
|
||||
}
|
||||
} while(--count>0);
|
||||
return i;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
|
||||
char b=(char)byte;
|
||||
while(b==elements[i].charAt(byteIndex, *strings)) {
|
||||
++i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
|
||||
: LinearMatchNode(len, nextNode), s(bytes) {
|
||||
hash=hash*37+ustr_hashCharsN(bytes, len);
|
||||
}
|
||||
|
||||
UBool
|
||||
BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
if(!LinearMatchNode::operator==(other)) {
|
||||
return FALSE;
|
||||
}
|
||||
const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
|
||||
return 0==uprv_memcmp(s, o.s, length);
|
||||
}
|
||||
|
||||
void
|
||||
BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
|
||||
BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
|
||||
next->write(builder);
|
||||
b.write(s, length);
|
||||
offset=b.write(b.getMinLinearMatch()+length-1);
|
||||
}
|
||||
|
||||
StringTrieBuilder::Node *
|
||||
BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
|
||||
Node *nextNode) const {
|
||||
return new BTLinearMatchNode(
|
||||
elements[i].getString(*strings).data()+byteIndex,
|
||||
length,
|
||||
nextNode);
|
||||
}
|
||||
|
||||
UBool
|
||||
BytesTrieBuilder::ensureCapacity(int32_t length) {
|
||||
if(bytes==NULL) {
|
||||
return FALSE; // previous memory allocation had failed
|
||||
}
|
||||
if(length>bytesCapacity) {
|
||||
int32_t newCapacity=bytesCapacity;
|
||||
do {
|
||||
newCapacity*=2;
|
||||
} while(newCapacity<=length);
|
||||
char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
|
||||
if(newBytes==NULL) {
|
||||
// unable to allocate memory
|
||||
uprv_free(bytes);
|
||||
bytes=NULL;
|
||||
bytesCapacity=0;
|
||||
return FALSE;
|
||||
}
|
||||
uprv_memcpy(newBytes+(newCapacity-bytesLength),
|
||||
bytes+(bytesCapacity-bytesLength), bytesLength);
|
||||
uprv_free(bytes);
|
||||
bytes=newBytes;
|
||||
bytesCapacity=newCapacity;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::write(int32_t byte) {
|
||||
int32_t newLength=bytesLength+1;
|
||||
if(ensureCapacity(newLength)) {
|
||||
bytesLength=newLength;
|
||||
bytes[bytesCapacity-bytesLength]=(char)byte;
|
||||
}
|
||||
return bytesLength;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::write(const char *b, int32_t length) {
|
||||
int32_t newLength=bytesLength+length;
|
||||
if(ensureCapacity(newLength)) {
|
||||
bytesLength=newLength;
|
||||
uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
|
||||
}
|
||||
return bytesLength;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
|
||||
return write(elements[i].getString(*strings).data()+byteIndex, length);
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
|
||||
if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
|
||||
return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
|
||||
}
|
||||
char intBytes[5];
|
||||
int32_t length=1;
|
||||
if(i<0 || i>0xffffff) {
|
||||
intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
|
||||
intBytes[1]=(char)((uint32_t)i>>24);
|
||||
intBytes[2]=(char)((uint32_t)i>>16);
|
||||
intBytes[3]=(char)((uint32_t)i>>8);
|
||||
intBytes[4]=(char)i;
|
||||
length=5;
|
||||
// } else if(i<=BytesTrie::kMaxOneByteValue) {
|
||||
// intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
|
||||
} else {
|
||||
if(i<=BytesTrie::kMaxTwoByteValue) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
|
||||
} else {
|
||||
if(i<=BytesTrie::kMaxThreeByteValue) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
|
||||
} else {
|
||||
intBytes[0]=(char)BytesTrie::kFourByteValueLead;
|
||||
intBytes[1]=(char)(i>>16);
|
||||
length=2;
|
||||
}
|
||||
intBytes[length++]=(char)(i>>8);
|
||||
}
|
||||
intBytes[length++]=(char)i;
|
||||
}
|
||||
intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
|
||||
return write(intBytes, length);
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
|
||||
int32_t offset=write(node);
|
||||
if(hasValue) {
|
||||
offset=writeValueAndFinal(value, FALSE);
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
||||
int32_t i=bytesLength-jumpTarget;
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
return write(i);
|
||||
}
|
||||
char intBytes[5];
|
||||
int32_t length;
|
||||
if(i<=BytesTrie::kMaxTwoByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
|
||||
length=1;
|
||||
} else {
|
||||
if(i<=BytesTrie::kMaxThreeByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
|
||||
length=2;
|
||||
} else {
|
||||
if(i<=0xffffff) {
|
||||
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
|
||||
length=3;
|
||||
} else {
|
||||
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
|
||||
intBytes[1]=(char)(i>>24);
|
||||
length=4;
|
||||
}
|
||||
intBytes[1]=(char)(i>>16);
|
||||
}
|
||||
intBytes[1]=(char)(i>>8);
|
||||
}
|
||||
intBytes[length++]=(char)i;
|
||||
return write(intBytes, length);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
@ -0,0 +1,210 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytestrieiterator.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010nov03
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "charstr.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: bytes_(static_cast<const uint8_t *>(trieBytes)),
|
||||
pos_(bytes_), initialPos_(bytes_),
|
||||
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
|
||||
str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
// str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
|
||||
// a public API header for which we would want it to depend only on
|
||||
// other public headers.
|
||||
// Unlike BytesTrie itself, its Iterator performs memory allocations anyway
|
||||
// via the CharString and UVector32 implementations, so this additional
|
||||
// cost is minimal.
|
||||
str_=new CharString();
|
||||
stack_=new UVector32(errorCode);
|
||||
if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
|
||||
remainingMatchLength_(trie.remainingMatchLength_),
|
||||
initialRemainingMatchLength_(trie.remainingMatchLength_),
|
||||
str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
str_=new CharString();
|
||||
stack_=new UVector32(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if(str_==NULL || stack_==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
if(length>=0) {
|
||||
// Pending linear-match node, append remaining bytes to str_.
|
||||
++length;
|
||||
if(maxLength_>0 && length>maxLength_) {
|
||||
length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
|
||||
}
|
||||
str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
|
||||
pos_+=length;
|
||||
remainingMatchLength_-=length;
|
||||
}
|
||||
}
|
||||
|
||||
BytesTrie::Iterator::~Iterator() {
|
||||
delete str_;
|
||||
delete stack_;
|
||||
}
|
||||
|
||||
BytesTrie::Iterator &
|
||||
BytesTrie::Iterator::reset() {
|
||||
pos_=initialPos_;
|
||||
remainingMatchLength_=initialRemainingMatchLength_;
|
||||
int32_t length=remainingMatchLength_+1; // Remaining match length.
|
||||
if(maxLength_>0 && length>maxLength_) {
|
||||
length=maxLength_;
|
||||
}
|
||||
str_->truncate(length);
|
||||
pos_+=length;
|
||||
remainingMatchLength_-=length;
|
||||
stack_->setSize(0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool
|
||||
BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
|
||||
|
||||
UBool
|
||||
BytesTrie::Iterator::next(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
if(stack_->isEmpty()) {
|
||||
return FALSE;
|
||||
}
|
||||
// Pop the state off the stack and continue with the next outbound edge of
|
||||
// the branch node.
|
||||
int32_t stackSize=stack_->size();
|
||||
int32_t length=stack_->elementAti(stackSize-1);
|
||||
pos=bytes_+stack_->elementAti(stackSize-2);
|
||||
stack_->setSize(stackSize-2);
|
||||
str_->truncate(length&0xffff);
|
||||
length=(int32_t)((uint32_t)length>>16);
|
||||
if(length>1) {
|
||||
pos=branchNext(pos, length, errorCode);
|
||||
if(pos==NULL) {
|
||||
return TRUE; // Reached a final value.
|
||||
}
|
||||
} else {
|
||||
str_->append((char)*pos++, errorCode);
|
||||
}
|
||||
}
|
||||
if(remainingMatchLength_>=0) {
|
||||
// We only get here if we started in a pending linear-match node
|
||||
// with more than maxLength remaining bytes.
|
||||
return truncateAndStop();
|
||||
}
|
||||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node>=kMinValueLead) {
|
||||
// Deliver value for the byte sequence so far.
|
||||
UBool isFinal=(UBool)(node&kValueIsFinal);
|
||||
value_=readValue(pos, node>>1);
|
||||
if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
|
||||
pos_=NULL;
|
||||
} else {
|
||||
pos_=skipValue(pos, node);
|
||||
}
|
||||
sp_.set(str_->data(), str_->length());
|
||||
return TRUE;
|
||||
}
|
||||
if(maxLength_>0 && str_->length()==maxLength_) {
|
||||
return truncateAndStop();
|
||||
}
|
||||
if(node<kMinLinearMatch) {
|
||||
if(node==0) {
|
||||
node=*pos++;
|
||||
}
|
||||
pos=branchNext(pos, node+1, errorCode);
|
||||
if(pos==NULL) {
|
||||
return TRUE; // Reached a final value.
|
||||
}
|
||||
} else {
|
||||
// Linear-match node, append length bytes to str_.
|
||||
int32_t length=node-kMinLinearMatch+1;
|
||||
if(maxLength_>0 && str_->length()+length>maxLength_) {
|
||||
str_->append(reinterpret_cast<const char *>(pos),
|
||||
maxLength_-str_->length(), errorCode);
|
||||
return truncateAndStop();
|
||||
}
|
||||
str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
|
||||
pos+=length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
BytesTrie::Iterator::truncateAndStop() {
|
||||
pos_=NULL;
|
||||
sp_.set(str_->data(), str_->length());
|
||||
value_=-1; // no real value for str
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// Branch node, needs to take the first outbound edge and push state for the rest.
|
||||
const uint8_t *
|
||||
BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
// Push state for the greater-or-equal edge.
|
||||
stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
|
||||
stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
|
||||
// Follow the less-than edge.
|
||||
length>>=1;
|
||||
pos=jumpByDelta(pos);
|
||||
}
|
||||
// List of key-value pairs where values are either final values or jump deltas.
|
||||
// Read the first (key, value) pair.
|
||||
uint8_t trieByte=*pos++;
|
||||
int32_t node=*pos++;
|
||||
UBool isFinal=(UBool)(node&kValueIsFinal);
|
||||
int32_t value=readValue(pos, node>>1);
|
||||
pos=skipValue(pos, node);
|
||||
stack_->addElement((int32_t)(pos-bytes_), errorCode);
|
||||
stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
|
||||
str_->append((char)trieByte, errorCode);
|
||||
if(isFinal) {
|
||||
pos_=NULL;
|
||||
sp_.set(str_->data(), str_->length());
|
||||
value_=value;
|
||||
return NULL;
|
||||
} else {
|
||||
return pos+value;
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
584
UnicodeConverter/icubuilds-mac/icu/icu/common/caniter.cpp
Normal file
584
UnicodeConverter/icubuilds-mac/icu/icu/common/caniter.cpp
Normal file
@ -0,0 +1,584 @@
|
||||
/*
|
||||
*****************************************************************************
|
||||
* Copyright (C) 1996-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/caniter.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/usetiter.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "hash.h"
|
||||
#include "normalizer2impl.h"
|
||||
|
||||
/**
|
||||
* This class allows one to iterate through all the strings that are canonically equivalent to a given
|
||||
* string. For example, here are some sample results:
|
||||
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
1: \u0041\u030A\u0064\u0307\u0327
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
2: \u0041\u030A\u0064\u0327\u0307
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
3: \u0041\u030A\u1E0B\u0327
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
4: \u0041\u030A\u1E11\u0307
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
5: \u00C5\u0064\u0307\u0327
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
6: \u00C5\u0064\u0327\u0307
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
7: \u00C5\u1E0B\u0327
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
8: \u00C5\u1E11\u0307
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
9: \u212B\u0064\u0307\u0327
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
10: \u212B\u0064\u0327\u0307
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
11: \u212B\u1E0B\u0327
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
12: \u212B\u1E11\u0307
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
|
||||
* since it has not been optimized for that situation.
|
||||
*@author M. Davis
|
||||
*@draft
|
||||
*/
|
||||
|
||||
// public
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// TODO: add boilerplate methods.
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
|
||||
|
||||
/**
|
||||
*@param source string to get results for
|
||||
*/
|
||||
CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
|
||||
pieces(NULL),
|
||||
pieces_length(0),
|
||||
pieces_lengths(NULL),
|
||||
current(NULL),
|
||||
current_length(0),
|
||||
nfd(*Normalizer2::getNFDInstance(status)),
|
||||
nfcImpl(*Normalizer2Factory::getNFCImpl(status))
|
||||
{
|
||||
if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
|
||||
setSource(sourceStr, status);
|
||||
}
|
||||
}
|
||||
|
||||
CanonicalIterator::~CanonicalIterator() {
|
||||
cleanPieces();
|
||||
}
|
||||
|
||||
void CanonicalIterator::cleanPieces() {
|
||||
int32_t i = 0;
|
||||
if(pieces != NULL) {
|
||||
for(i = 0; i < pieces_length; i++) {
|
||||
if(pieces[i] != NULL) {
|
||||
delete[] pieces[i];
|
||||
}
|
||||
}
|
||||
uprv_free(pieces);
|
||||
pieces = NULL;
|
||||
pieces_length = 0;
|
||||
}
|
||||
if(pieces_lengths != NULL) {
|
||||
uprv_free(pieces_lengths);
|
||||
pieces_lengths = NULL;
|
||||
}
|
||||
if(current != NULL) {
|
||||
uprv_free(current);
|
||||
current = NULL;
|
||||
current_length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*@return gets the source: NOTE: it is the NFD form of source
|
||||
*/
|
||||
UnicodeString CanonicalIterator::getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the iterator so that one can start again from the beginning.
|
||||
*/
|
||||
void CanonicalIterator::reset() {
|
||||
done = FALSE;
|
||||
for (int i = 0; i < current_length; ++i) {
|
||||
current[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*@return the next string that is canonically equivalent. The value null is returned when
|
||||
* the iteration is done.
|
||||
*/
|
||||
UnicodeString CanonicalIterator::next() {
|
||||
int32_t i = 0;
|
||||
|
||||
if (done) {
|
||||
buffer.setToBogus();
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// delete old contents
|
||||
buffer.remove();
|
||||
|
||||
// construct return value
|
||||
|
||||
for (i = 0; i < pieces_length; ++i) {
|
||||
buffer.append(pieces[i][current[i]]);
|
||||
}
|
||||
//String result = buffer.toString(); // not needed
|
||||
|
||||
// find next value for next time
|
||||
|
||||
for (i = current_length - 1; ; --i) {
|
||||
if (i < 0) {
|
||||
done = TRUE;
|
||||
break;
|
||||
}
|
||||
current[i]++;
|
||||
if (current[i] < pieces_lengths[i]) break; // got sequence
|
||||
current[i] = 0;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
*@param set the source string to iterate against. This allows the same iterator to be used
|
||||
* while changing the source string, saving object creation.
|
||||
*/
|
||||
void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
|
||||
int32_t list_length = 0;
|
||||
UChar32 cp = 0;
|
||||
int32_t start = 0;
|
||||
int32_t i = 0;
|
||||
UnicodeString *list = NULL;
|
||||
|
||||
nfd.normalize(newSource, source, status);
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
done = FALSE;
|
||||
|
||||
cleanPieces();
|
||||
|
||||
// catch degenerate case
|
||||
if (newSource.length() == 0) {
|
||||
pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
|
||||
pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
|
||||
pieces_length = 1;
|
||||
current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
|
||||
current_length = 1;
|
||||
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
current[0] = 0;
|
||||
pieces[0] = new UnicodeString[1];
|
||||
pieces_lengths[0] = 1;
|
||||
if (pieces[0] == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
list = new UnicodeString[source.length()];
|
||||
if (list == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
|
||||
// i should initialy be the number of code units at the
|
||||
// start of the string
|
||||
i = U16_LENGTH(source.char32At(0));
|
||||
//int32_t i = 1;
|
||||
// find the segments
|
||||
// This code iterates through the source string and
|
||||
// extracts segments that end up on a codepoint that
|
||||
// doesn't start any decompositions. (Analysis is done
|
||||
// on the NFD form - see above).
|
||||
for (; i < source.length(); i += U16_LENGTH(cp)) {
|
||||
cp = source.char32At(i);
|
||||
if (nfcImpl.isCanonSegmentStarter(cp)) {
|
||||
source.extract(start, i-start, list[list_length++]); // add up to i
|
||||
start = i;
|
||||
}
|
||||
}
|
||||
source.extract(start, i-start, list[list_length++]); // add last one
|
||||
|
||||
|
||||
// allocate the arrays, and find the strings that are CE to each segment
|
||||
pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
|
||||
pieces_length = list_length;
|
||||
pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
|
||||
current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
|
||||
current_length = list_length;
|
||||
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
|
||||
for (i = 0; i < current_length; i++) {
|
||||
current[i] = 0;
|
||||
}
|
||||
// for each segment, get all the combinations that can produce
|
||||
// it after NFD normalization
|
||||
for (i = 0; i < pieces_length; ++i) {
|
||||
//if (PROGRESS) printf("SEGMENT\n");
|
||||
pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
|
||||
}
|
||||
|
||||
delete[] list;
|
||||
return;
|
||||
// Common section to cleanup all local variables and reset object variables.
|
||||
CleanPartialInitialization:
|
||||
if (list != NULL) {
|
||||
delete[] list;
|
||||
}
|
||||
cleanPieces();
|
||||
}
|
||||
|
||||
/**
|
||||
* Dumb recursive implementation of permutation.
|
||||
* TODO: optimize
|
||||
* @param source the string to find permutations for
|
||||
* @return the results in a set.
|
||||
*/
|
||||
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
|
||||
int32_t i = 0;
|
||||
|
||||
// optimization:
|
||||
// if zero or one character, just return a set with it
|
||||
// we check for length < 2 to keep from counting code points all the time
|
||||
if (source.length() <= 2 && source.countChar32() <= 1) {
|
||||
UnicodeString *toPut = new UnicodeString(source);
|
||||
/* test for NULL */
|
||||
if (toPut == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
result->put(source, toPut, status);
|
||||
return;
|
||||
}
|
||||
|
||||
// otherwise iterate through the string, and recursively permute all the other characters
|
||||
UChar32 cp;
|
||||
Hashtable subpermute(status);
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
subpermute.setValueDeleter(uprv_deleteUObject);
|
||||
|
||||
for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
|
||||
cp = source.char32At(i);
|
||||
const UHashElement *ne = NULL;
|
||||
int32_t el = UHASH_FIRST;
|
||||
UnicodeString subPermuteString = source;
|
||||
|
||||
// optimization:
|
||||
// if the character is canonical combining class zero,
|
||||
// don't permute it
|
||||
if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
|
||||
//System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
|
||||
continue;
|
||||
}
|
||||
|
||||
subpermute.removeAll();
|
||||
|
||||
// see what the permutations of the characters before and after this one are
|
||||
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
|
||||
permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
|
||||
/* Test for buffer overflows */
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
// The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents
|
||||
// of source at this point.
|
||||
|
||||
// prefix this character to all of them
|
||||
ne = subpermute.nextElement(el);
|
||||
while (ne != NULL) {
|
||||
UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
|
||||
UnicodeString *chStr = new UnicodeString(cp);
|
||||
//test for NULL
|
||||
if (chStr == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
|
||||
//if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr));
|
||||
result->put(*chStr, chStr, status);
|
||||
ne = subpermute.nextElement(el);
|
||||
}
|
||||
}
|
||||
//return result;
|
||||
}
|
||||
|
||||
// privates
|
||||
|
||||
// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
|
||||
UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
|
||||
Hashtable result(status);
|
||||
Hashtable permutations(status);
|
||||
Hashtable basic(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
result.setValueDeleter(uprv_deleteUObject);
|
||||
permutations.setValueDeleter(uprv_deleteUObject);
|
||||
basic.setValueDeleter(uprv_deleteUObject);
|
||||
|
||||
UChar USeg[256];
|
||||
int32_t segLen = segment.extract(USeg, 256, status);
|
||||
getEquivalents2(&basic, USeg, segLen, status);
|
||||
|
||||
// now get all the permutations
|
||||
// add only the ones that are canonically equivalent
|
||||
// TODO: optimize by not permuting any class zero.
|
||||
|
||||
const UHashElement *ne = NULL;
|
||||
int32_t el = UHASH_FIRST;
|
||||
//Iterator it = basic.iterator();
|
||||
ne = basic.nextElement(el);
|
||||
//while (it.hasNext())
|
||||
while (ne != NULL) {
|
||||
//String item = (String) it.next();
|
||||
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
|
||||
|
||||
permutations.removeAll();
|
||||
permute(item, CANITER_SKIP_ZEROES, &permutations, status);
|
||||
const UHashElement *ne2 = NULL;
|
||||
int32_t el2 = UHASH_FIRST;
|
||||
//Iterator it2 = permutations.iterator();
|
||||
ne2 = permutations.nextElement(el2);
|
||||
//while (it2.hasNext())
|
||||
while (ne2 != NULL) {
|
||||
//String possible = (String) it2.next();
|
||||
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
|
||||
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
|
||||
UnicodeString attempt;
|
||||
nfd.normalize(possible, attempt, status);
|
||||
|
||||
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
|
||||
if (attempt==segment) {
|
||||
//if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
|
||||
// TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
|
||||
result.put(possible, new UnicodeString(possible), status); //add(possible);
|
||||
} else {
|
||||
//if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
|
||||
}
|
||||
|
||||
ne2 = permutations.nextElement(el2);
|
||||
}
|
||||
ne = basic.nextElement(el);
|
||||
}
|
||||
|
||||
/* Test for buffer overflows */
|
||||
if(U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
// convert into a String[] to clean up storage
|
||||
//String[] finalResult = new String[result.size()];
|
||||
UnicodeString *finalResult = NULL;
|
||||
int32_t resultCount;
|
||||
if((resultCount = result.count())) {
|
||||
finalResult = new UnicodeString[resultCount];
|
||||
if (finalResult == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
//result.toArray(finalResult);
|
||||
result_len = 0;
|
||||
el = UHASH_FIRST;
|
||||
ne = result.nextElement(el);
|
||||
while(ne != NULL) {
|
||||
finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
|
||||
ne = result.nextElement(el);
|
||||
}
|
||||
|
||||
|
||||
return finalResult;
|
||||
}
|
||||
|
||||
Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
|
||||
|
||||
UnicodeString toPut(segment, segLen);
|
||||
|
||||
fillinResult->put(toPut, new UnicodeString(toPut), status);
|
||||
|
||||
UnicodeSet starts;
|
||||
|
||||
// cycle through all the characters
|
||||
UChar32 cp;
|
||||
for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
|
||||
// see if any character is at the start of some decomposition
|
||||
U16_GET(segment, 0, i, segLen, cp);
|
||||
if (!nfcImpl.getCanonStartSet(cp, starts)) {
|
||||
continue;
|
||||
}
|
||||
// if so, see which decompositions match
|
||||
UnicodeSetIterator iter(starts);
|
||||
while (iter.next()) {
|
||||
UChar32 cp2 = iter.getCodepoint();
|
||||
Hashtable remainder(status);
|
||||
remainder.setValueDeleter(uprv_deleteUObject);
|
||||
if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// there were some matches, so add all the possibilities to the set.
|
||||
UnicodeString prefix(segment, i);
|
||||
prefix += cp2;
|
||||
|
||||
int32_t el = UHASH_FIRST;
|
||||
const UHashElement *ne = remainder.nextElement(el);
|
||||
while (ne != NULL) {
|
||||
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
|
||||
UnicodeString *toAdd = new UnicodeString(prefix);
|
||||
/* test for NULL */
|
||||
if (toAdd == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
*toAdd += item;
|
||||
fillinResult->put(*toAdd, toAdd, status);
|
||||
|
||||
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
|
||||
|
||||
ne = remainder.nextElement(el);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Test for buffer overflows */
|
||||
if(U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
return fillinResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* See if the decomposition of cp2 is at segment starting at segmentPos
|
||||
* (with canonical rearrangment!)
|
||||
* If so, take the remainder, and return the equivalents
|
||||
*/
|
||||
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
|
||||
//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
|
||||
//if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
|
||||
//if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UnicodeString temp(comp);
|
||||
int32_t inputLen=temp.length();
|
||||
UnicodeString decompString;
|
||||
nfd.normalize(temp, decompString, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
if (decompString.isBogus()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
const UChar *decomp=decompString.getBuffer();
|
||||
int32_t decompLen=decompString.length();
|
||||
|
||||
// See if it matches the start of segment (at segmentPos)
|
||||
UBool ok = FALSE;
|
||||
UChar32 cp;
|
||||
int32_t decompPos = 0;
|
||||
UChar32 decompCp;
|
||||
U16_NEXT(decomp, decompPos, decompLen, decompCp);
|
||||
|
||||
int32_t i = segmentPos;
|
||||
while(i < segLen) {
|
||||
U16_NEXT(segment, i, segLen, cp);
|
||||
|
||||
if (cp == decompCp) { // if equal, eat another cp from decomp
|
||||
|
||||
//if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp))));
|
||||
|
||||
if (decompPos == decompLen) { // done, have all decomp characters!
|
||||
temp.append(segment+i, segLen-i);
|
||||
ok = TRUE;
|
||||
break;
|
||||
}
|
||||
U16_NEXT(decomp, decompPos, decompLen, decompCp);
|
||||
} else {
|
||||
//if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp))));
|
||||
|
||||
// brute force approach
|
||||
temp.append(cp);
|
||||
|
||||
/* TODO: optimize
|
||||
// since we know that the classes are monotonically increasing, after zero
|
||||
// e.g. 0 5 7 9 0 3
|
||||
// we can do an optimization
|
||||
// there are only a few cases that work: zero, less, same, greater
|
||||
// if both classes are the same, we fail
|
||||
// if the decomp class < the segment class, we fail
|
||||
|
||||
segClass = getClass(cp);
|
||||
if (decompClass <= segClass) return null;
|
||||
*/
|
||||
}
|
||||
}
|
||||
if (!ok)
|
||||
return NULL; // we failed, characters left over
|
||||
|
||||
//if (PROGRESS) printf("Matches\n");
|
||||
|
||||
if (inputLen == temp.length()) {
|
||||
fillinResult->put(UnicodeString(), new UnicodeString(), status);
|
||||
return fillinResult; // succeed, but no remainder
|
||||
}
|
||||
|
||||
// brute force approach
|
||||
// check to make sure result is canonically equivalent
|
||||
UnicodeString trial;
|
||||
nfd.normalize(temp, trial, status);
|
||||
if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
98
UnicodeConverter/icubuilds-mac/icu/icu/common/chariter.cpp
Normal file
98
UnicodeConverter/icubuilds-mac/icu/icu/common/chariter.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/chariter.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
ForwardCharacterIterator::~ForwardCharacterIterator() {}
|
||||
ForwardCharacterIterator::ForwardCharacterIterator()
|
||||
: UObject()
|
||||
{}
|
||||
ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
|
||||
: UObject(other)
|
||||
{}
|
||||
|
||||
|
||||
CharacterIterator::CharacterIterator()
|
||||
: textLength(0), pos(0), begin(0), end(0) {
|
||||
}
|
||||
|
||||
CharacterIterator::CharacterIterator(int32_t length)
|
||||
: textLength(length), pos(0), begin(0), end(length) {
|
||||
if(textLength < 0) {
|
||||
textLength = end = 0;
|
||||
}
|
||||
}
|
||||
|
||||
CharacterIterator::CharacterIterator(int32_t length, int32_t position)
|
||||
: textLength(length), pos(position), begin(0), end(length) {
|
||||
if(textLength < 0) {
|
||||
textLength = end = 0;
|
||||
}
|
||||
if(pos < 0) {
|
||||
pos = 0;
|
||||
} else if(pos > end) {
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
|
||||
CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
|
||||
: textLength(length), pos(position), begin(textBegin), end(textEnd) {
|
||||
if(textLength < 0) {
|
||||
textLength = 0;
|
||||
}
|
||||
if(begin < 0) {
|
||||
begin = 0;
|
||||
} else if(begin > textLength) {
|
||||
begin = textLength;
|
||||
}
|
||||
if(end < begin) {
|
||||
end = begin;
|
||||
} else if(end > textLength) {
|
||||
end = textLength;
|
||||
}
|
||||
if(pos < begin) {
|
||||
pos = begin;
|
||||
} else if(pos > end) {
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
|
||||
CharacterIterator::~CharacterIterator() {}
|
||||
|
||||
CharacterIterator::CharacterIterator(const CharacterIterator &that) :
|
||||
ForwardCharacterIterator(that),
|
||||
textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
|
||||
{
|
||||
}
|
||||
|
||||
CharacterIterator &
|
||||
CharacterIterator::operator=(const CharacterIterator &that) {
|
||||
ForwardCharacterIterator::operator=(that);
|
||||
textLength = that.textLength;
|
||||
pos = that.pos;
|
||||
begin = that.begin;
|
||||
end = that.end;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// implementing first[32]PostInc() directly in a subclass should be faster
|
||||
// but these implementations make subclassing a little easier
|
||||
UChar
|
||||
CharacterIterator::firstPostInc(void) {
|
||||
setToStart();
|
||||
return nextPostInc();
|
||||
}
|
||||
|
||||
UChar32
|
||||
CharacterIterator::first32PostInc(void) {
|
||||
setToStart();
|
||||
return next32PostInc();
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
171
UnicodeConverter/icubuilds-mac/icu/icu/common/charstr.cpp
Normal file
171
UnicodeConverter/icubuilds-mac/icu/icu/common/charstr.cpp
Normal file
@ -0,0 +1,171 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: charstr.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010may19
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uinvchar.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
|
||||
len=s.len;
|
||||
uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
int32_t CharString::lastIndexOf(char c) const {
|
||||
for(int32_t i=len; i>0;) {
|
||||
if(buffer[--i]==c) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
CharString &CharString::truncate(int32_t newLength) {
|
||||
if(newLength<0) {
|
||||
newLength=0;
|
||||
}
|
||||
if(newLength<len) {
|
||||
buffer[len=newLength]=0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
CharString &CharString::append(char c, UErrorCode &errorCode) {
|
||||
if(ensureCapacity(len+2, 0, errorCode)) {
|
||||
buffer[len++]=c;
|
||||
buffer[len]=0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return *this;
|
||||
}
|
||||
if(sLength<-1 || (s==NULL && sLength!=0)) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return *this;
|
||||
}
|
||||
if(sLength<0) {
|
||||
sLength=uprv_strlen(s);
|
||||
}
|
||||
if(sLength>0) {
|
||||
if(s==(buffer.getAlias()+len)) {
|
||||
// The caller wrote into the getAppendBuffer().
|
||||
if(sLength>=(buffer.getCapacity()-len)) {
|
||||
// The caller wrote too much.
|
||||
errorCode=U_INTERNAL_PROGRAM_ERROR;
|
||||
} else {
|
||||
buffer[len+=sLength]=0;
|
||||
}
|
||||
} else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) &&
|
||||
sLength>=(buffer.getCapacity()-len)
|
||||
) {
|
||||
// (Part of) this string is appended to itself which requires reallocation,
|
||||
// so we have to make a copy of the substring and append that.
|
||||
return append(CharString(s, sLength, errorCode), errorCode);
|
||||
} else if(ensureCapacity(len+sLength+1, 0, errorCode)) {
|
||||
uprv_memcpy(buffer.getAlias()+len, s, sLength);
|
||||
buffer[len+=sLength]=0;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
char *CharString::getAppendBuffer(int32_t minCapacity,
|
||||
int32_t desiredCapacityHint,
|
||||
int32_t &resultCapacity,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
resultCapacity=0;
|
||||
return NULL;
|
||||
}
|
||||
int32_t appendCapacity=buffer.getCapacity()-len-1; // -1 for NUL
|
||||
if(appendCapacity>=minCapacity) {
|
||||
resultCapacity=appendCapacity;
|
||||
return buffer.getAlias()+len;
|
||||
}
|
||||
if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) {
|
||||
resultCapacity=buffer.getCapacity()-len-1;
|
||||
return buffer.getAlias()+len;
|
||||
}
|
||||
resultCapacity=0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return *this;
|
||||
}
|
||||
if (!uprv_isInvariantUnicodeString(s)) {
|
||||
errorCode = U_INVARIANT_CONVERSION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
if(ensureCapacity(len+s.length()+1, 0, errorCode)) {
|
||||
len+=s.extract(0, 0x7fffffff, buffer.getAlias()+len, buffer.getCapacity()-len, US_INV);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool CharString::ensureCapacity(int32_t capacity,
|
||||
int32_t desiredCapacityHint,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
if(capacity>buffer.getCapacity()) {
|
||||
if(desiredCapacityHint==0) {
|
||||
desiredCapacityHint=capacity+buffer.getCapacity();
|
||||
}
|
||||
if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) &&
|
||||
buffer.resize(capacity, len+1)==NULL
|
||||
) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
CharString &CharString::appendPathPart(const StringPiece &s, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return *this;
|
||||
}
|
||||
if(s.length()==0) {
|
||||
return *this;
|
||||
}
|
||||
char c;
|
||||
if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
|
||||
append(U_FILE_SEP_CHAR, errorCode);
|
||||
}
|
||||
append(s, errorCode);
|
||||
return *this;
|
||||
}
|
||||
|
||||
CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) {
|
||||
char c;
|
||||
if(U_SUCCESS(errorCode) && len>0 &&
|
||||
(c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
|
||||
append(U_FILE_SEP_CHAR, errorCode);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
139
UnicodeConverter/icubuilds-mac/icu/icu/common/charstr.h
Normal file
139
UnicodeConverter/icubuilds-mac/icu/icu/common/charstr.h
Normal file
@ -0,0 +1,139 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 11/19/2001 aliu Creation.
|
||||
* 05/19/2010 markus Rewritten from scratch
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CHARSTRING_H
|
||||
#define CHARSTRING_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// Windows needs us to DLL-export the MaybeStackArray template specialization,
|
||||
// but MacOS X cannot handle it. Same as in digitlst.h.
|
||||
#if !U_PLATFORM_IS_DARWIN_BASED
|
||||
template class U_COMMON_API MaybeStackArray<char, 40>;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* ICU-internal char * string class.
|
||||
* This class does not assume or enforce any particular character encoding.
|
||||
* Raw bytes can be stored. The string object owns its characters.
|
||||
* A terminating NUL is stored, but the class does not prevent embedded NUL characters.
|
||||
*
|
||||
* This class wants to be convenient but is also deliberately minimalist.
|
||||
* Please do not add methods if they only add minor convenience.
|
||||
* For example:
|
||||
* cs.data()[5]='a'; // no need for setCharAt(5, 'a')
|
||||
*/
|
||||
class U_COMMON_API CharString : public UMemory {
|
||||
public:
|
||||
CharString() : len(0) { buffer[0]=0; }
|
||||
CharString(const StringPiece &s, UErrorCode &errorCode) : len(0) {
|
||||
buffer[0]=0;
|
||||
append(s, errorCode);
|
||||
}
|
||||
CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
|
||||
buffer[0]=0;
|
||||
append(s, errorCode);
|
||||
}
|
||||
CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
|
||||
buffer[0]=0;
|
||||
append(s, sLength, errorCode);
|
||||
}
|
||||
~CharString() {}
|
||||
|
||||
/**
|
||||
* Replaces this string's contents with the other string's contents.
|
||||
* CharString does not support the standard copy constructor nor
|
||||
* the assignment operator, to make copies explicit and to
|
||||
* use a UErrorCode where memory allocations might be needed.
|
||||
*/
|
||||
CharString ©From(const CharString &other, UErrorCode &errorCode);
|
||||
|
||||
UBool isEmpty() const { return len==0; }
|
||||
int32_t length() const { return len; }
|
||||
char operator[](int32_t index) const { return buffer[index]; }
|
||||
StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
|
||||
|
||||
const char *data() const { return buffer.getAlias(); }
|
||||
char *data() { return buffer.getAlias(); }
|
||||
|
||||
/** @return last index of c, or -1 if c is not in this string */
|
||||
int32_t lastIndexOf(char c) const;
|
||||
|
||||
CharString &clear() { len=0; buffer[0]=0; return *this; }
|
||||
CharString &truncate(int32_t newLength);
|
||||
|
||||
CharString &append(char c, UErrorCode &errorCode);
|
||||
CharString &append(const StringPiece &s, UErrorCode &errorCode) {
|
||||
return append(s.data(), s.length(), errorCode);
|
||||
}
|
||||
CharString &append(const CharString &s, UErrorCode &errorCode) {
|
||||
return append(s.data(), s.length(), errorCode);
|
||||
}
|
||||
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
|
||||
* There will additionally be space for a terminating NUL right at resultCapacity.
|
||||
* (This function is similar to ByteSink.GetAppendBuffer().)
|
||||
*
|
||||
* The returned buffer is only valid until the next write operation
|
||||
* on this string.
|
||||
*
|
||||
* After writing at most resultCapacity bytes, call append() with the
|
||||
* pointer returned from this function and the number of bytes written.
|
||||
*
|
||||
* @param minCapacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desiredCapacityHint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param resultCapacity will be set to the capacity of the returned buffer
|
||||
* @param errorCode in/out error code
|
||||
* @return a buffer with resultCapacity>=min_capacity
|
||||
*/
|
||||
char *getAppendBuffer(int32_t minCapacity,
|
||||
int32_t desiredCapacityHint,
|
||||
int32_t &resultCapacity,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Appends a filename/path part, e.g., a directory name.
|
||||
* First appends a U_FILE_SEP_CHAR if necessary.
|
||||
* Does nothing if s is empty.
|
||||
*/
|
||||
CharString &appendPathPart(const StringPiece &s, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Appends a U_FILE_SEP_CHAR if this string is not empty
|
||||
* and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
|
||||
*/
|
||||
CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
MaybeStackArray<char, 40> buffer;
|
||||
int32_t len;
|
||||
|
||||
UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
|
||||
|
||||
CharString(const CharString &other); // forbid copying of this class
|
||||
CharString &operator=(const CharString &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
//eof
|
||||
160
UnicodeConverter/icubuilds-mac/icu/icu/common/cmemory.c
Normal file
160
UnicodeConverter/icubuilds-mac/icu/icu/common/cmemory.c
Normal file
@ -0,0 +1,160 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File cmemory.c ICU Heap allocation.
|
||||
* All ICU heap allocation, both for C and C++ new of ICU
|
||||
* class types, comes through these functions.
|
||||
*
|
||||
* If you have a need to replace ICU allocation, this is the
|
||||
* place to do it.
|
||||
*
|
||||
* Note that uprv_malloc(0) returns a non-NULL pointer, and
|
||||
* that a subsequent free of that pointer value is a NOP.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
#include "unicode/uclean.h"
|
||||
#include "cmemory.h"
|
||||
#include "putilimp.h"
|
||||
#include "uassert.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
/* uprv_malloc(0) returns a pointer to this read-only data. */
|
||||
static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
|
||||
|
||||
/* Function Pointers for user-supplied heap functions */
|
||||
static const void *pContext;
|
||||
static UMemAllocFn *pAlloc;
|
||||
static UMemReallocFn *pRealloc;
|
||||
static UMemFreeFn *pFree;
|
||||
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
#include <stdio.h>
|
||||
static int n=0;
|
||||
static long b=0;
|
||||
#endif
|
||||
|
||||
#if U_DEBUG
|
||||
|
||||
static char gValidMemorySink = 0;
|
||||
|
||||
U_CAPI void uprv_checkValidMemory(const void *p, size_t n) {
|
||||
/*
|
||||
* Access the memory to ensure that it's all valid.
|
||||
* Load and save a computed value to try to ensure that the compiler
|
||||
* does not throw away the whole loop.
|
||||
* A thread analyzer might complain about un-mutexed access to gValidMemorySink
|
||||
* which is true but harmless because no one ever uses the value in gValidMemorySink.
|
||||
*/
|
||||
const char *s = (const char *)p;
|
||||
char c = gValidMemorySink;
|
||||
size_t i;
|
||||
U_ASSERT(p != NULL);
|
||||
for(i = 0; i < n; ++i) {
|
||||
c ^= s[i];
|
||||
}
|
||||
gValidMemorySink = c;
|
||||
}
|
||||
|
||||
#endif /* U_DEBUG */
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_malloc(size_t s) {
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
#if 1
|
||||
putchar('>');
|
||||
fflush(stdout);
|
||||
#else
|
||||
fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
|
||||
#endif
|
||||
#endif
|
||||
if (s > 0) {
|
||||
if (pAlloc) {
|
||||
return (*pAlloc)(pContext, s);
|
||||
} else {
|
||||
return uprv_default_malloc(s);
|
||||
}
|
||||
} else {
|
||||
return (void *)zeroMem;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_realloc(void * buffer, size_t size) {
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
putchar('~');
|
||||
fflush(stdout);
|
||||
#endif
|
||||
if (buffer == zeroMem) {
|
||||
return uprv_malloc(size);
|
||||
} else if (size == 0) {
|
||||
if (pFree) {
|
||||
(*pFree)(pContext, buffer);
|
||||
} else {
|
||||
uprv_default_free(buffer);
|
||||
}
|
||||
return (void *)zeroMem;
|
||||
} else {
|
||||
if (pRealloc) {
|
||||
return (*pRealloc)(pContext, buffer, size);
|
||||
} else {
|
||||
return uprv_default_realloc(buffer, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_free(void *buffer) {
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
putchar('<');
|
||||
fflush(stdout);
|
||||
#endif
|
||||
if (buffer != zeroMem) {
|
||||
if (pFree) {
|
||||
(*pFree)(pContext, buffer);
|
||||
} else {
|
||||
uprv_default_free(buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_calloc(size_t num, size_t size) {
|
||||
void *mem = NULL;
|
||||
size *= num;
|
||||
mem = uprv_malloc(size);
|
||||
if (mem) {
|
||||
uprv_memset(mem, 0, size);
|
||||
}
|
||||
return mem;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
if (a==NULL || r==NULL || f==NULL) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
pContext = context;
|
||||
pAlloc = a;
|
||||
pRealloc = r;
|
||||
pFree = f;
|
||||
}
|
||||
|
||||
|
||||
U_CFUNC UBool cmemory_cleanup(void) {
|
||||
pContext = NULL;
|
||||
pAlloc = NULL;
|
||||
pRealloc = NULL;
|
||||
pFree = NULL;
|
||||
return TRUE;
|
||||
}
|
||||
651
UnicodeConverter/icubuilds-mac/icu/icu/common/cmemory.h
Normal file
651
UnicodeConverter/icubuilds-mac/icu/icu/common/cmemory.h
Normal file
@ -0,0 +1,651 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File CMEMORY.H
|
||||
*
|
||||
* Contains stdlib.h/string.h memory functions
|
||||
*
|
||||
* @author Bertrand A. Damiba
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 6/20/98 Bertrand Created.
|
||||
* 05/03/99 stephen Changed from functions to macros.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CMEMORY_H
|
||||
#define CMEMORY_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include "unicode/localpointer.h"
|
||||
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#if U_DEBUG
|
||||
|
||||
/*
|
||||
* The C++ standard requires that the source pointer for memcpy() & memmove()
|
||||
* is valid, not NULL, and not at the end of an allocated memory block.
|
||||
* In debug mode, we read one byte from the source point to verify that it's
|
||||
* a valid, readable pointer.
|
||||
*/
|
||||
|
||||
U_CAPI void uprv_checkValidMemory(const void *p, size_t n);
|
||||
|
||||
#define uprv_memcpy(dst, src, size) ( \
|
||||
uprv_checkValidMemory(src, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size))
|
||||
#define uprv_memmove(dst, src, size) ( \
|
||||
uprv_checkValidMemory(src, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size))
|
||||
|
||||
#else
|
||||
|
||||
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
|
||||
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
|
||||
|
||||
#endif /* U_DEBUG */
|
||||
|
||||
/**
|
||||
* \def UPRV_LENGTHOF
|
||||
* Convenience macro to determine the length of a fixed array at compile-time.
|
||||
* @param array A fixed length array
|
||||
* @return The length of the array, in elements
|
||||
* @internal
|
||||
*/
|
||||
#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
|
||||
#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_free(void *mem);
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);
|
||||
|
||||
/**
|
||||
* This should align the memory properly on any machine.
|
||||
* This is very useful for the safeClone functions.
|
||||
*/
|
||||
typedef union {
|
||||
long t1;
|
||||
double t2;
|
||||
void *t3;
|
||||
} UAlignedMemory;
|
||||
|
||||
/**
|
||||
* Get the least significant bits of a pointer (a memory address).
|
||||
* For example, with a mask of 3, the macro gets the 2 least significant bits,
|
||||
* which will be 0 if the pointer is 32-bit (4-byte) aligned.
|
||||
*
|
||||
* ptrdiff_t is the most appropriate integer type to cast to.
|
||||
* size_t should work too, since on most (or all?) platforms it has the same
|
||||
* width as ptrdiff_t.
|
||||
*/
|
||||
#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
|
||||
|
||||
/**
|
||||
* Get the amount of bytes that a pointer is off by from
|
||||
* the previous UAlignedMemory-aligned pointer.
|
||||
*/
|
||||
#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
|
||||
|
||||
/**
|
||||
* Get the amount of bytes to add to a pointer
|
||||
* in order to get the next UAlignedMemory-aligned address.
|
||||
*/
|
||||
#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
|
||||
|
||||
/**
|
||||
* Heap clean up function, called from u_cleanup()
|
||||
* Clears any user heap functions from u_setMemoryFunctions()
|
||||
* Does NOT deallocate any remaining allocated memory.
|
||||
*/
|
||||
U_CFUNC UBool
|
||||
cmemory_cleanup(void);
|
||||
|
||||
/**
|
||||
* A function called by <TT>uhash_remove</TT>,
|
||||
* <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
|
||||
* an existing key or value.
|
||||
* @param obj A key or value stored in a hashtable
|
||||
* @see uprv_deleteUObject
|
||||
*/
|
||||
typedef void U_CALLCONV UObjectDeleter(void* obj);
|
||||
|
||||
/**
|
||||
* Deleter for UObject instances.
|
||||
* Works for all subclasses of UObject because it has a virtual destructor.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_deleteUObject(void *obj);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* "Smart pointer" class, deletes memory via uprv_free().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
* Adds operator[] for array item access.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalMemory : public LocalPointerBase<T> {
|
||||
public:
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an array of T items that is adopted
|
||||
*/
|
||||
explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
|
||||
#if U_HAVE_RVALUE_REFERENCES
|
||||
/**
|
||||
* Move constructor, leaves src with isNull().
|
||||
* @param src source smart pointer
|
||||
*/
|
||||
LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
|
||||
src.ptr=NULL;
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* Destructor deletes the memory it owns.
|
||||
*/
|
||||
~LocalMemory() {
|
||||
uprv_free(LocalPointerBase<T>::ptr);
|
||||
}
|
||||
#if U_HAVE_RVALUE_REFERENCES
|
||||
/**
|
||||
* Move assignment operator, leaves src with isNull().
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source smart pointer
|
||||
* @return *this
|
||||
*/
|
||||
LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
|
||||
return moveFrom(src);
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* Move assignment, leaves src with isNull().
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
*
|
||||
* Can be called explicitly, does not need C++11 support.
|
||||
* @param src source smart pointer
|
||||
* @return *this
|
||||
*/
|
||||
LocalMemory<T> &moveFrom(LocalMemory<T> &src) U_NOEXCEPT {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=src.ptr;
|
||||
src.ptr=NULL;
|
||||
return *this;
|
||||
}
|
||||
/**
|
||||
* Swap pointers.
|
||||
* @param other other smart pointer
|
||||
*/
|
||||
void swap(LocalMemory<T> &other) U_NOEXCEPT {
|
||||
T *temp=LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=other.ptr;
|
||||
other.ptr=temp;
|
||||
}
|
||||
/**
|
||||
* Non-member LocalMemory swap function.
|
||||
* @param p1 will get p2's pointer
|
||||
* @param p2 will get p1's pointer
|
||||
*/
|
||||
friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) U_NOEXCEPT {
|
||||
p1.swap(p2);
|
||||
}
|
||||
/**
|
||||
* Deletes the array it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* @param p simple pointer to an array of T items that is adopted
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
uprv_free(LocalPointerBase<T>::ptr);
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
/**
|
||||
* Deletes the array it owns, allocates a new one and reset its bytes to 0.
|
||||
* Returns the new array pointer.
|
||||
* If the allocation fails, then the current array is unchanged and
|
||||
* this method returns NULL.
|
||||
* @param newCapacity must be >0
|
||||
* @return the allocated array pointer, or NULL if the allocation failed
|
||||
*/
|
||||
inline T *allocateInsteadAndReset(int32_t newCapacity=1);
|
||||
/**
|
||||
* Deletes the array it owns and allocates a new one, copying length T items.
|
||||
* Returns the new array pointer.
|
||||
* If the allocation fails, then the current array is unchanged and
|
||||
* this method returns NULL.
|
||||
* @param newCapacity must be >0
|
||||
* @param length number of T items to be copied from the old array to the new one;
|
||||
* must be no more than the capacity of the old array,
|
||||
* which the caller must track because the LocalMemory does not track it
|
||||
* @return the allocated array pointer, or NULL if the allocation failed
|
||||
*/
|
||||
inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
|
||||
/**
|
||||
* Array item access (writable).
|
||||
* No index bounds check.
|
||||
* @param i array index
|
||||
* @return reference to the array item
|
||||
*/
|
||||
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
|
||||
if(newCapacity>0) {
|
||||
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
|
||||
if(p!=NULL) {
|
||||
uprv_memset(p, 0, newCapacity*sizeof(T));
|
||||
uprv_free(LocalPointerBase<T>::ptr);
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
return p;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
|
||||
if(newCapacity>0) {
|
||||
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
|
||||
if(p!=NULL) {
|
||||
if(length>0) {
|
||||
if(length>newCapacity) {
|
||||
length=newCapacity;
|
||||
}
|
||||
uprv_memcpy(p, LocalPointerBase<T>::ptr, length*sizeof(T));
|
||||
}
|
||||
uprv_free(LocalPointerBase<T>::ptr);
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
return p;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple array/buffer management class using uprv_malloc() and uprv_free().
|
||||
* Provides an internal array with fixed capacity. Can alias another array
|
||||
* or allocate one.
|
||||
*
|
||||
* The array address is properly aligned for type T. It might not be properly
|
||||
* aligned for types larger than T (or larger than the largest subtype of T).
|
||||
*
|
||||
* Unlike LocalMemory and LocalArray, this class never adopts
|
||||
* (takes ownership of) another array.
|
||||
*/
|
||||
template<typename T, int32_t stackCapacity>
|
||||
class MaybeStackArray {
|
||||
public:
|
||||
/**
|
||||
* Default constructor initializes with internal T[stackCapacity] buffer.
|
||||
*/
|
||||
MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
|
||||
/**
|
||||
* Destructor deletes the array (if owned).
|
||||
*/
|
||||
~MaybeStackArray() { releaseArray(); }
|
||||
/**
|
||||
* Returns the array capacity (number of T items).
|
||||
* @return array capacity
|
||||
*/
|
||||
int32_t getCapacity() const { return capacity; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the array pointer
|
||||
*/
|
||||
T *getAlias() const { return ptr; }
|
||||
/**
|
||||
* Returns the array limit. Simple convenience method.
|
||||
* @return getAlias()+getCapacity()
|
||||
*/
|
||||
T *getArrayLimit() const { return getAlias()+capacity; }
|
||||
// No "operator T *() const" because that can make
|
||||
// expressions like mbs[index] ambiguous for some compilers.
|
||||
/**
|
||||
* Array item access (const).
|
||||
* No index bounds check.
|
||||
* @param i array index
|
||||
* @return reference to the array item
|
||||
*/
|
||||
const T &operator[](ptrdiff_t i) const { return ptr[i]; }
|
||||
/**
|
||||
* Array item access (writable).
|
||||
* No index bounds check.
|
||||
* @param i array index
|
||||
* @return reference to the array item
|
||||
*/
|
||||
T &operator[](ptrdiff_t i) { return ptr[i]; }
|
||||
/**
|
||||
* Deletes the array (if owned) and aliases another one, no transfer of ownership.
|
||||
* If the arguments are illegal, then the current array is unchanged.
|
||||
* @param otherArray must not be NULL
|
||||
* @param otherCapacity must be >0
|
||||
*/
|
||||
void aliasInstead(T *otherArray, int32_t otherCapacity) {
|
||||
if(otherArray!=NULL && otherCapacity>0) {
|
||||
releaseArray();
|
||||
ptr=otherArray;
|
||||
capacity=otherCapacity;
|
||||
needToRelease=FALSE;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Deletes the array (if owned) and allocates a new one, copying length T items.
|
||||
* Returns the new array pointer.
|
||||
* If the allocation fails, then the current array is unchanged and
|
||||
* this method returns NULL.
|
||||
* @param newCapacity can be less than or greater than the current capacity;
|
||||
* must be >0
|
||||
* @param length number of T items to be copied from the old array to the new one
|
||||
* @return the allocated array pointer, or NULL if the allocation failed
|
||||
*/
|
||||
inline T *resize(int32_t newCapacity, int32_t length=0);
|
||||
/**
|
||||
* Gives up ownership of the array if owned, or else clones it,
|
||||
* copying length T items; resets itself to the internal stack array.
|
||||
* Returns NULL if the allocation failed.
|
||||
* @param length number of T items to copy when cloning,
|
||||
* and capacity of the clone when cloning
|
||||
* @param resultCapacity will be set to the returned array's capacity (output-only)
|
||||
* @return the array pointer;
|
||||
* caller becomes responsible for deleting the array
|
||||
*/
|
||||
inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
|
||||
private:
|
||||
T *ptr;
|
||||
int32_t capacity;
|
||||
UBool needToRelease;
|
||||
T stackArray[stackCapacity];
|
||||
void releaseArray() {
|
||||
if(needToRelease) {
|
||||
uprv_free(ptr);
|
||||
}
|
||||
}
|
||||
/* No comparison operators with other MaybeStackArray's. */
|
||||
bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
|
||||
bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
|
||||
/* No ownership transfer: No copy constructor, no assignment operator. */
|
||||
MaybeStackArray(const MaybeStackArray & /*other*/) {}
|
||||
void operator=(const MaybeStackArray & /*other*/) {}
|
||||
|
||||
// No heap allocation. Use only on the stack.
|
||||
// (Declaring these functions private triggers a cascade of problems:
|
||||
// MSVC insists on exporting an instantiation of MaybeStackArray, which
|
||||
// requires that all functions be defined.
|
||||
// An empty implementation of new() is rejected, it must return a value.
|
||||
// Returning NULL is rejected by gcc for operator new.
|
||||
// The expedient thing is just not to override operator new.
|
||||
// While relatively pointless, heap allocated instances will function.
|
||||
// static void * U_EXPORT2 operator new(size_t size);
|
||||
// static void * U_EXPORT2 operator new[](size_t size);
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
// static void * U_EXPORT2 operator new(size_t, void *ptr);
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename T, int32_t stackCapacity>
|
||||
inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
|
||||
if(newCapacity>0) {
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
|
||||
#endif
|
||||
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
|
||||
if(p!=NULL) {
|
||||
if(length>0) {
|
||||
if(length>capacity) {
|
||||
length=capacity;
|
||||
}
|
||||
if(length>newCapacity) {
|
||||
length=newCapacity;
|
||||
}
|
||||
uprv_memcpy(p, ptr, length*sizeof(T));
|
||||
}
|
||||
releaseArray();
|
||||
ptr=p;
|
||||
capacity=newCapacity;
|
||||
needToRelease=TRUE;
|
||||
}
|
||||
return p;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, int32_t stackCapacity>
|
||||
inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
|
||||
T *p;
|
||||
if(needToRelease) {
|
||||
p=ptr;
|
||||
} else if(length<=0) {
|
||||
return NULL;
|
||||
} else {
|
||||
if(length>capacity) {
|
||||
length=capacity;
|
||||
}
|
||||
p=(T *)uprv_malloc(length*sizeof(T));
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
|
||||
#endif
|
||||
if(p==NULL) {
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(p, ptr, length*sizeof(T));
|
||||
}
|
||||
resultCapacity=length;
|
||||
ptr=stackArray;
|
||||
capacity=stackCapacity;
|
||||
needToRelease=FALSE;
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Variant of MaybeStackArray that allocates a header struct and an array
|
||||
* in one contiguous memory block, using uprv_malloc() and uprv_free().
|
||||
* Provides internal memory with fixed array capacity. Can alias another memory
|
||||
* block or allocate one.
|
||||
* The stackCapacity is the number of T items in the internal memory,
|
||||
* not counting the H header.
|
||||
* Unlike LocalMemory and LocalArray, this class never adopts
|
||||
* (takes ownership of) another memory block.
|
||||
*/
|
||||
template<typename H, typename T, int32_t stackCapacity>
|
||||
class MaybeStackHeaderAndArray {
|
||||
public:
|
||||
/**
|
||||
* Default constructor initializes with internal H+T[stackCapacity] buffer.
|
||||
*/
|
||||
MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
|
||||
/**
|
||||
* Destructor deletes the memory (if owned).
|
||||
*/
|
||||
~MaybeStackHeaderAndArray() { releaseMemory(); }
|
||||
/**
|
||||
* Returns the array capacity (number of T items).
|
||||
* @return array capacity
|
||||
*/
|
||||
int32_t getCapacity() const { return capacity; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the header pointer
|
||||
*/
|
||||
H *getAlias() const { return ptr; }
|
||||
/**
|
||||
* Returns the array start.
|
||||
* @return array start, same address as getAlias()+1
|
||||
*/
|
||||
T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
|
||||
/**
|
||||
* Returns the array limit.
|
||||
* @return array limit
|
||||
*/
|
||||
T *getArrayLimit() const { return getArrayStart()+capacity; }
|
||||
/**
|
||||
* Access without ownership change. Same as getAlias().
|
||||
* A class instance can be used directly in expressions that take a T *.
|
||||
* @return the header pointer
|
||||
*/
|
||||
operator H *() const { return ptr; }
|
||||
/**
|
||||
* Array item access (writable).
|
||||
* No index bounds check.
|
||||
* @param i array index
|
||||
* @return reference to the array item
|
||||
*/
|
||||
T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
|
||||
/**
|
||||
* Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
|
||||
* If the arguments are illegal, then the current memory is unchanged.
|
||||
* @param otherArray must not be NULL
|
||||
* @param otherCapacity must be >0
|
||||
*/
|
||||
void aliasInstead(H *otherMemory, int32_t otherCapacity) {
|
||||
if(otherMemory!=NULL && otherCapacity>0) {
|
||||
releaseMemory();
|
||||
ptr=otherMemory;
|
||||
capacity=otherCapacity;
|
||||
needToRelease=FALSE;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Deletes the memory block (if owned) and allocates a new one,
|
||||
* copying the header and length T array items.
|
||||
* Returns the new header pointer.
|
||||
* If the allocation fails, then the current memory is unchanged and
|
||||
* this method returns NULL.
|
||||
* @param newCapacity can be less than or greater than the current capacity;
|
||||
* must be >0
|
||||
* @param length number of T items to be copied from the old array to the new one
|
||||
* @return the allocated pointer, or NULL if the allocation failed
|
||||
*/
|
||||
inline H *resize(int32_t newCapacity, int32_t length=0);
|
||||
/**
|
||||
* Gives up ownership of the memory if owned, or else clones it,
|
||||
* copying the header and length T array items; resets itself to the internal memory.
|
||||
* Returns NULL if the allocation failed.
|
||||
* @param length number of T items to copy when cloning,
|
||||
* and array capacity of the clone when cloning
|
||||
* @param resultCapacity will be set to the returned array's capacity (output-only)
|
||||
* @return the header pointer;
|
||||
* caller becomes responsible for deleting the array
|
||||
*/
|
||||
inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
|
||||
private:
|
||||
H *ptr;
|
||||
int32_t capacity;
|
||||
UBool needToRelease;
|
||||
// stackHeader must precede stackArray immediately.
|
||||
H stackHeader;
|
||||
T stackArray[stackCapacity];
|
||||
void releaseMemory() {
|
||||
if(needToRelease) {
|
||||
uprv_free(ptr);
|
||||
}
|
||||
}
|
||||
/* No comparison operators with other MaybeStackHeaderAndArray's. */
|
||||
bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
|
||||
bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
|
||||
/* No ownership transfer: No copy constructor, no assignment operator. */
|
||||
MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
|
||||
void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
|
||||
|
||||
// No heap allocation. Use only on the stack.
|
||||
// (Declaring these functions private triggers a cascade of problems;
|
||||
// see the MaybeStackArray class for details.)
|
||||
// static void * U_EXPORT2 operator new(size_t size);
|
||||
// static void * U_EXPORT2 operator new[](size_t size);
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
// static void * U_EXPORT2 operator new(size_t, void *ptr);
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename H, typename T, int32_t stackCapacity>
|
||||
inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
|
||||
int32_t length) {
|
||||
if(newCapacity>=0) {
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
|
||||
#endif
|
||||
H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
|
||||
if(p!=NULL) {
|
||||
if(length<0) {
|
||||
length=0;
|
||||
} else if(length>0) {
|
||||
if(length>capacity) {
|
||||
length=capacity;
|
||||
}
|
||||
if(length>newCapacity) {
|
||||
length=newCapacity;
|
||||
}
|
||||
}
|
||||
uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
|
||||
releaseMemory();
|
||||
ptr=p;
|
||||
capacity=newCapacity;
|
||||
needToRelease=TRUE;
|
||||
}
|
||||
return p;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename H, typename T, int32_t stackCapacity>
|
||||
inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
|
||||
int32_t &resultCapacity) {
|
||||
H *p;
|
||||
if(needToRelease) {
|
||||
p=ptr;
|
||||
} else {
|
||||
if(length<0) {
|
||||
length=0;
|
||||
} else if(length>capacity) {
|
||||
length=capacity;
|
||||
}
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
|
||||
#endif
|
||||
p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
|
||||
if(p==NULL) {
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
|
||||
}
|
||||
resultCapacity=length;
|
||||
ptr=&stackHeader;
|
||||
capacity=stackCapacity;
|
||||
needToRelease=FALSE;
|
||||
return p;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* __cplusplus */
|
||||
#endif /* CMEMORY_H */
|
||||
95
UnicodeConverter/icubuilds-mac/icu/icu/common/cpputils.h
Normal file
95
UnicodeConverter/icubuilds-mac/icu/icu/common/cpputils.h
Normal file
@ -0,0 +1,95 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: cpputils.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*/
|
||||
|
||||
#ifndef CPPUTILS_H
|
||||
#define CPPUTILS_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Array copy utility functions */
|
||||
/*==========================================================================*/
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const double* src, int32_t srcStart,
|
||||
double* dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
|
||||
int8_t* dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
|
||||
int16_t* dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
|
||||
int32_t* dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void
|
||||
uprv_arrayCopy(const UChar *src, int32_t srcStart,
|
||||
UChar *dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
/**
|
||||
* Copy an array of UnicodeString OBJECTS (not pointers).
|
||||
* @internal
|
||||
*/
|
||||
static inline void
|
||||
uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count)
|
||||
{ while(count-- > 0) *dst++ = *src++; }
|
||||
|
||||
/**
|
||||
* Copy an array of UnicodeString OBJECTS (not pointers).
|
||||
* @internal
|
||||
*/
|
||||
static inline void
|
||||
uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart,
|
||||
icu::UnicodeString *dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
|
||||
|
||||
/**
|
||||
* Checks that the string is readable and writable.
|
||||
* Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer().
|
||||
*/
|
||||
inline void
|
||||
uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode) && s.isBogus()) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _CPPUTILS */
|
||||
339
UnicodeConverter/icubuilds-mac/icu/icu/common/cstring.c
Normal file
339
UnicodeConverter/icubuilds-mac/icu/icu/common/cstring.c
Normal file
@ -0,0 +1,339 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File CSTRING.C
|
||||
*
|
||||
* @author Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 6/18/98 hshih Created
|
||||
* 09/08/98 stephen Added include for ctype, for Mac Port
|
||||
* 11/15/99 helena Integrated S/390 IEEE changes.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uassert.h"
|
||||
|
||||
/*
|
||||
* We hardcode case conversion for invariant characters to match our expectation
|
||||
* and the compiler execution charset.
|
||||
* This prevents problems on systems
|
||||
* - with non-default casing behavior, like Turkish system locales where
|
||||
* tolower('I') maps to dotless i and toupper('i') maps to dotted I
|
||||
* - where there are no lowercase Latin characters at all, or using different
|
||||
* codes (some old EBCDIC codepages)
|
||||
*
|
||||
* This works because the compiler usually runs on a platform where the execution
|
||||
* charset includes all of the invariant characters at their expected
|
||||
* code positions, so that the char * string literals in ICU code match
|
||||
* the char literals here.
|
||||
*
|
||||
* Note that the set of lowercase Latin letters is discontiguous in EBCDIC
|
||||
* and the set of uppercase Latin letters is discontiguous as well.
|
||||
*/
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uprv_isASCIILetter(char c) {
|
||||
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
return
|
||||
('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
|
||||
('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
|
||||
#else
|
||||
return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
|
||||
#endif
|
||||
}
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_toupper(char c) {
|
||||
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
|
||||
c=(char)(c+('A'-'a'));
|
||||
}
|
||||
#else
|
||||
if('a'<=c && c<='z') {
|
||||
c=(char)(c+('A'-'a'));
|
||||
}
|
||||
#endif
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Commented out because cstring.h defines uprv_tolower() to be
|
||||
* the same as either uprv_asciitolower() or uprv_ebcdictolower()
|
||||
* to reduce the amount of code to cover with tests.
|
||||
*
|
||||
* Note that this uprv_tolower() definition is likely to work for most
|
||||
* charset families, not just ASCII and EBCDIC, because its #else branch
|
||||
* is written generically.
|
||||
*/
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_tolower(char c) {
|
||||
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
|
||||
c=(char)(c+('a'-'A'));
|
||||
}
|
||||
#else
|
||||
if('A'<=c && c<='Z') {
|
||||
c=(char)(c+('a'-'A'));
|
||||
}
|
||||
#endif
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_asciitolower(char c) {
|
||||
if(0x41<=c && c<=0x5a) {
|
||||
c=(char)(c+0x20);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_ebcdictolower(char c) {
|
||||
if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
|
||||
(0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
|
||||
(0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
|
||||
) {
|
||||
c=(char)(c-0x40);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toLowerCase(char* str)
|
||||
{
|
||||
char* origPtr = str;
|
||||
|
||||
if (str) {
|
||||
do
|
||||
*str = (char)uprv_tolower(*str);
|
||||
while (*(str++));
|
||||
}
|
||||
|
||||
return origPtr;
|
||||
}
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toUpperCase(char* str)
|
||||
{
|
||||
char* origPtr = str;
|
||||
|
||||
if (str) {
|
||||
do
|
||||
*str = (char)uprv_toupper(*str);
|
||||
while (*(str++));
|
||||
}
|
||||
|
||||
return origPtr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Takes a int32_t and fills in a char* string with that number "radix"-based.
|
||||
* Does not handle negative values (makes an empty string for them).
|
||||
* Writes at most 12 chars ("-2147483647" plus NUL).
|
||||
* Returns the length of the string (not including the NUL).
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
|
||||
{
|
||||
char tbuf[30];
|
||||
int32_t tbx = sizeof(tbuf);
|
||||
uint8_t digit;
|
||||
int32_t length = 0;
|
||||
uint32_t uval;
|
||||
|
||||
U_ASSERT(radix>=2 && radix<=16);
|
||||
uval = (uint32_t) v;
|
||||
if(v<0 && radix == 10) {
|
||||
/* Only in base 10 do we conside numbers to be signed. */
|
||||
uval = (uint32_t)(-v);
|
||||
buffer[length++] = '-';
|
||||
}
|
||||
|
||||
tbx = sizeof(tbuf)-1;
|
||||
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
|
||||
do {
|
||||
digit = (uint8_t)(uval % radix);
|
||||
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
|
||||
uval = uval / radix;
|
||||
} while (uval != 0);
|
||||
|
||||
/* copy converted number into user buffer */
|
||||
uprv_strcpy(buffer+length, tbuf+tbx);
|
||||
length += sizeof(tbuf) - tbx -1;
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Takes a int64_t and fills in a char* string with that number "radix"-based.
|
||||
* Writes at most 21: chars ("-9223372036854775807" plus NUL).
|
||||
* Returns the length of the string, not including the terminating NULL.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
|
||||
{
|
||||
char tbuf[30];
|
||||
int32_t tbx = sizeof(tbuf);
|
||||
uint8_t digit;
|
||||
int32_t length = 0;
|
||||
uint64_t uval;
|
||||
|
||||
U_ASSERT(radix>=2 && radix<=16);
|
||||
uval = (uint64_t) v;
|
||||
if(v<0 && radix == 10) {
|
||||
/* Only in base 10 do we conside numbers to be signed. */
|
||||
uval = (uint64_t)(-v);
|
||||
buffer[length++] = '-';
|
||||
}
|
||||
|
||||
tbx = sizeof(tbuf)-1;
|
||||
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
|
||||
do {
|
||||
digit = (uint8_t)(uval % radix);
|
||||
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
|
||||
uval = uval / radix;
|
||||
} while (uval != 0);
|
||||
|
||||
/* copy converted number into user buffer */
|
||||
uprv_strcpy(buffer+length, tbuf+tbx);
|
||||
length += sizeof(tbuf) - tbx -1;
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_stringToInteger(const char *integerString, int32_t radix)
|
||||
{
|
||||
char *end;
|
||||
return uprv_strtoul(integerString, &end, radix);
|
||||
|
||||
}
|
||||
|
||||
U_CAPI int U_EXPORT2
|
||||
uprv_stricmp(const char *str1, const char *str2) {
|
||||
if(str1==NULL) {
|
||||
if(str2==NULL) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(str2==NULL) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-NULL strings lexically with lowercase */
|
||||
int rc;
|
||||
unsigned char c1, c2;
|
||||
|
||||
for(;;) {
|
||||
c1=(unsigned char)*str1;
|
||||
c2=(unsigned char)*str2;
|
||||
if(c1==0) {
|
||||
if(c2==0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(c2==0) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-zero characters with lowercase */
|
||||
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
++str1;
|
||||
++str2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int U_EXPORT2
|
||||
uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
|
||||
if(str1==NULL) {
|
||||
if(str2==NULL) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(str2==NULL) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-NULL strings lexically with lowercase */
|
||||
int rc;
|
||||
unsigned char c1, c2;
|
||||
|
||||
for(; n--;) {
|
||||
c1=(unsigned char)*str1;
|
||||
c2=(unsigned char)*str2;
|
||||
if(c1==0) {
|
||||
if(c2==0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(c2==0) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-zero characters with lowercase */
|
||||
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
++str1;
|
||||
++str2;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
uprv_strdup(const char *src) {
|
||||
size_t len = uprv_strlen(src) + 1;
|
||||
char *dup = (char *) uprv_malloc(len);
|
||||
|
||||
if (dup) {
|
||||
uprv_memcpy(dup, src, len);
|
||||
}
|
||||
|
||||
return dup;
|
||||
}
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
uprv_strndup(const char *src, int32_t n) {
|
||||
char *dup;
|
||||
|
||||
if(n < 0) {
|
||||
dup = uprv_strdup(src);
|
||||
} else {
|
||||
dup = (char*)uprv_malloc(n+1);
|
||||
if (dup) {
|
||||
uprv_memcpy(dup, src, n);
|
||||
dup[n] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return dup;
|
||||
}
|
||||
140
UnicodeConverter/icubuilds-mac/icu/icu/common/cstring.h
Normal file
140
UnicodeConverter/icubuilds-mac/icu/icu/common/cstring.h
Normal file
@ -0,0 +1,140 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File CSTRING.H
|
||||
*
|
||||
* Contains CString interface
|
||||
*
|
||||
* @author Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 6/17/98 hshih Created.
|
||||
* 05/03/99 stephen Changed from functions to macros.
|
||||
* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CSTRING_H
|
||||
#define CSTRING_H 1
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src)
|
||||
#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
|
||||
#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
|
||||
#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
|
||||
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
|
||||
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
|
||||
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
|
||||
|
||||
#if U_DEBUG
|
||||
|
||||
#define uprv_strncpy(dst, src, size) ( \
|
||||
uprv_checkValidMemory(src, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size))
|
||||
#define uprv_strncmp(s1, s2, n) ( \
|
||||
uprv_checkValidMemory(s1, 1), \
|
||||
uprv_checkValidMemory(s2, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n))
|
||||
#define uprv_strncat(dst, src, n) ( \
|
||||
uprv_checkValidMemory(src, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE strncat(dst, src, n))
|
||||
|
||||
#else
|
||||
|
||||
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
|
||||
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
|
||||
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
|
||||
|
||||
#endif /* U_DEBUG */
|
||||
|
||||
/**
|
||||
* Is c an ASCII-repertoire letter a-z or A-Z?
|
||||
* Note: The implementation is specific to whether ICU is compiled for
|
||||
* an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uprv_isASCIILetter(char c);
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_toupper(char c);
|
||||
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_asciitolower(char c);
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_ebcdictolower(char c);
|
||||
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define uprv_tolower uprv_asciitolower
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
# define uprv_tolower uprv_ebcdictolower
|
||||
#else
|
||||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
|
||||
#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
|
||||
#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
|
||||
#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
|
||||
|
||||
/* Conversion from a digit to the character with radix base from 2-19 */
|
||||
/* May need to use U_UPPER_ORDINAL*/
|
||||
#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
uprv_strdup(const char *src);
|
||||
|
||||
/**
|
||||
* uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
|
||||
* Terminate with a null at offset n. If n is -1, works like uprv_strdup
|
||||
* @param src
|
||||
* @param n length of the input string, not including null.
|
||||
* @return new string (owned by caller, use uprv_free to free).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI char* U_EXPORT2
|
||||
uprv_strndup(const char *src, int32_t n);
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toLowerCase(char* str);
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toUpperCase(char* str);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_stringToInteger(const char *integerString, int32_t radix);
|
||||
|
||||
/**
|
||||
* Case-insensitive, language-independent string comparison
|
||||
* limited to the ASCII character repertoire.
|
||||
*/
|
||||
U_CAPI int U_EXPORT2
|
||||
uprv_stricmp(const char *str1, const char *str2);
|
||||
|
||||
/**
|
||||
* Case-insensitive, language-independent string comparison
|
||||
* limited to the ASCII character repertoire.
|
||||
*/
|
||||
U_CAPI int U_EXPORT2
|
||||
uprv_strnicmp(const char *str1, const char *str2, uint32_t n);
|
||||
|
||||
#endif /* ! CSTRING_H */
|
||||
53
UnicodeConverter/icubuilds-mac/icu/icu/common/cwchar.c
Normal file
53
UnicodeConverter/icubuilds-mac/icu/icu/common/cwchar.c
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: cwchar.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2001may25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !U_HAVE_WCSCPY
|
||||
|
||||
#include "cwchar.h"
|
||||
|
||||
U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
|
||||
wchar_t *start=dst;
|
||||
while(*dst!=0) {
|
||||
++dst;
|
||||
}
|
||||
while((*dst=*src)!=0) {
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
|
||||
wchar_t *start=dst;
|
||||
while((*dst=*src)!=0) {
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
U_CAPI size_t uprv_wcslen(const wchar_t *src) {
|
||||
const wchar_t *start=src;
|
||||
while(*src!=0) {
|
||||
++src;
|
||||
}
|
||||
return src-start;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
56
UnicodeConverter/icubuilds-mac/icu/icu/common/cwchar.h
Normal file
56
UnicodeConverter/icubuilds-mac/icu/icu/common/cwchar.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: cwchar.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2001may25
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This file contains ICU-internal definitions of wchar_t operations.
|
||||
* These definitions were moved here from cstring.h so that fewer
|
||||
* ICU implementation files include wchar.h.
|
||||
*/
|
||||
|
||||
#ifndef __CWCHAR_H__
|
||||
#define __CWCHAR_H__
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
|
||||
#if U_HAVE_WCHAR_H
|
||||
# include <wchar.h>
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Wide-character functions */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* The following are not available on all systems, defined in wchar.h or string.h. */
|
||||
#if U_HAVE_WCSCPY
|
||||
# define uprv_wcscpy wcscpy
|
||||
# define uprv_wcscat wcscat
|
||||
# define uprv_wcslen wcslen
|
||||
#else
|
||||
U_CAPI wchar_t* U_EXPORT2
|
||||
uprv_wcscpy(wchar_t *dst, const wchar_t *src);
|
||||
U_CAPI wchar_t* U_EXPORT2
|
||||
uprv_wcscat(wchar_t *dst, const wchar_t *src);
|
||||
U_CAPI size_t U_EXPORT2
|
||||
uprv_wcslen(const wchar_t *src);
|
||||
#endif
|
||||
|
||||
/* The following are part of the ANSI C standard, defined in stdlib.h . */
|
||||
#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
|
||||
#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
|
||||
|
||||
|
||||
#endif
|
||||
1403
UnicodeConverter/icubuilds-mac/icu/icu/common/dictbe.cpp
Normal file
1403
UnicodeConverter/icubuilds-mac/icu/icu/common/dictbe.cpp
Normal file
File diff suppressed because it is too large
Load Diff
427
UnicodeConverter/icubuilds-mac/icu/icu/common/dictbe.h
Normal file
427
UnicodeConverter/icubuilds-mac/icu/icu/common/dictbe.h
Normal file
@ -0,0 +1,427 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006-2014, International Business Machines Corporation *
|
||||
* and others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef DICTBE_H
|
||||
#define DICTBE_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
#include "brkeng.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class DictionaryMatcher;
|
||||
class Normalizer2;
|
||||
|
||||
/*******************************************************************
|
||||
* DictionaryBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
|
||||
* dictionary to determine language-specific breaks.</p>
|
||||
*
|
||||
* <p>After it is constructed a DictionaryBreakEngine may be shared between
|
||||
* threads without synchronization.</p>
|
||||
*/
|
||||
class DictionaryBreakEngine : public LanguageBreakEngine {
|
||||
private:
|
||||
/**
|
||||
* The set of characters handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fSet;
|
||||
|
||||
/**
|
||||
* The set of break types handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
uint32_t fTypes;
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
DictionaryBreakEngine();
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Constructor setting the break types handled.</p>
|
||||
*
|
||||
* @param breakTypes A bitmap of types handled by the engine.
|
||||
*/
|
||||
DictionaryBreakEngine( uint32_t breakTypes );
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~DictionaryBreakEngine();
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character for
|
||||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles( UChar32 c, int32_t breakType ) const;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A UText representing the text. The iterator is left at
|
||||
* the end of the run of characters which the engine is capable of handling
|
||||
* that starts from the first (or last) character in the range.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param reverse Whether the caller is looking for breaks in a reverse
|
||||
* direction.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* <p>Set the character set handled by this engine.</p>
|
||||
*
|
||||
* @param set A UnicodeSet of the set of characters handled by the engine
|
||||
*/
|
||||
virtual void setCharacters( const UnicodeSet &set );
|
||||
|
||||
/**
|
||||
* <p>Set the break types handled by this engine.</p>
|
||||
*
|
||||
* @param breakTypes A bitmap of types handled by the engine.
|
||||
*/
|
||||
// virtual void setBreakTypes( uint32_t breakTypes );
|
||||
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const = 0;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* ThaiBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
|
||||
* dictionary and heuristics to determine Thai-specific breaks.</p>
|
||||
*
|
||||
* <p>After it is constructed a ThaiBreakEngine may be shared between
|
||||
* threads without synchronization.</p>
|
||||
*/
|
||||
class ThaiBreakEngine : public DictionaryBreakEngine {
|
||||
private:
|
||||
/**
|
||||
* The set of characters handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fThaiWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fSuffixSet;
|
||||
UnicodeSet fMarkSet;
|
||||
DictionaryMatcher *fDictionary;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
|
||||
* engine is deleted.
|
||||
*/
|
||||
ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~ThaiBreakEngine();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* LaoBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>LaoBreakEngine is a kind of DictionaryBreakEngine that uses a
|
||||
* dictionary and heuristics to determine Lao-specific breaks.</p>
|
||||
*
|
||||
* <p>After it is constructed a LaoBreakEngine may be shared between
|
||||
* threads without synchronization.</p>
|
||||
*/
|
||||
class LaoBreakEngine : public DictionaryBreakEngine {
|
||||
private:
|
||||
/**
|
||||
* The set of characters handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fLaoWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
DictionaryMatcher *fDictionary;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
|
||||
* engine is deleted.
|
||||
*/
|
||||
LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~LaoBreakEngine();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* BurmeseBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
|
||||
* DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
|
||||
*
|
||||
* <p>After it is constructed a BurmeseBreakEngine may be shared between
|
||||
* threads without synchronization.</p>
|
||||
*/
|
||||
class BurmeseBreakEngine : public DictionaryBreakEngine {
|
||||
private:
|
||||
/**
|
||||
* The set of characters handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fBurmeseWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
DictionaryMatcher *fDictionary;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
|
||||
* engine is deleted.
|
||||
*/
|
||||
BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~BurmeseBreakEngine();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* KhmerBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
|
||||
* DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
|
||||
*
|
||||
* <p>After it is constructed a KhmerBreakEngine may be shared between
|
||||
* threads without synchronization.</p>
|
||||
*/
|
||||
class KhmerBreakEngine : public DictionaryBreakEngine {
|
||||
private:
|
||||
/**
|
||||
* The set of characters handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fKhmerWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
DictionaryMatcher *fDictionary;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
|
||||
* engine is deleted.
|
||||
*/
|
||||
KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~KhmerBreakEngine();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
};
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
/*******************************************************************
|
||||
* CjkBreakEngine
|
||||
*/
|
||||
|
||||
//indicates language/script that the CjkBreakEngine will handle
|
||||
enum LanguageType {
|
||||
kKorean,
|
||||
kChineseJapanese
|
||||
};
|
||||
|
||||
/**
|
||||
* <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
|
||||
* dictionary with costs associated with each word and
|
||||
* Viterbi decoding to determine CJK-specific breaks.</p>
|
||||
*/
|
||||
class CjkBreakEngine : public DictionaryBreakEngine {
|
||||
protected:
|
||||
/**
|
||||
* The set of characters handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
UnicodeSet fHangulWordSet;
|
||||
UnicodeSet fHanWordSet;
|
||||
UnicodeSet fKatakanaWordSet;
|
||||
UnicodeSet fHiraganaWordSet;
|
||||
|
||||
DictionaryMatcher *fDictionary;
|
||||
const Normalizer2 *nfkcNorm2;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
|
||||
* engine is deleted. The DictionaryMatcher must contain costs for each word
|
||||
* in order for the dictionary to work properly.
|
||||
*/
|
||||
CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~CjkBreakEngine();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* DICTBE_H */
|
||||
#endif
|
||||
240
UnicodeConverter/icubuilds-mac/icu/icu/common/dictionarydata.cpp
Normal file
240
UnicodeConverter/icubuilds-mac/icu/icu/common/dictionarydata.cpp
Normal file
@ -0,0 +1,240 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* dictionarydata.h
|
||||
*
|
||||
* created on: 2012may31
|
||||
* created by: Markus W. Scherer & Maxime Serrano
|
||||
*/
|
||||
|
||||
#include "dictionarydata.h"
|
||||
#include "unicode/ucharstrie.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
const int32_t DictionaryData::TRIE_TYPE_BYTES = 0;
|
||||
const int32_t DictionaryData::TRIE_TYPE_UCHARS = 1;
|
||||
const int32_t DictionaryData::TRIE_TYPE_MASK = 7;
|
||||
const int32_t DictionaryData::TRIE_HAS_VALUES = 8;
|
||||
|
||||
const int32_t DictionaryData::TRANSFORM_NONE = 0;
|
||||
const int32_t DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;
|
||||
const int32_t DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;
|
||||
const int32_t DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;
|
||||
|
||||
DictionaryMatcher::~DictionaryMatcher() {
|
||||
}
|
||||
|
||||
UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
|
||||
udata_close(file);
|
||||
}
|
||||
|
||||
int32_t UCharsDictionaryMatcher::getType() const {
|
||||
return DictionaryData::TRIE_TYPE_UCHARS;
|
||||
}
|
||||
|
||||
int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
|
||||
int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
||||
int32_t *prefix) const {
|
||||
|
||||
UCharsTrie uct(characters);
|
||||
int32_t startingTextIndex = utext_getNativeIndex(text);
|
||||
int32_t wordCount = 0;
|
||||
int32_t codePointsMatched = 0;
|
||||
|
||||
for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
|
||||
UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
|
||||
int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
|
||||
codePointsMatched += 1;
|
||||
if (USTRINGTRIE_HAS_VALUE(result)) {
|
||||
if (wordCount < limit) {
|
||||
if (values != NULL) {
|
||||
values[wordCount] = uct.getValue();
|
||||
}
|
||||
if (lengths != NULL) {
|
||||
lengths[wordCount] = lengthMatched;
|
||||
}
|
||||
if (cpLengths != NULL) {
|
||||
cpLengths[wordCount] = codePointsMatched;
|
||||
}
|
||||
++wordCount;
|
||||
}
|
||||
if (result == USTRINGTRIE_FINAL_VALUE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (result == USTRINGTRIE_NO_MATCH) {
|
||||
break;
|
||||
}
|
||||
if (lengthMatched >= maxLength) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (prefix != NULL) {
|
||||
*prefix = codePointsMatched;
|
||||
}
|
||||
return wordCount;
|
||||
}
|
||||
|
||||
BytesDictionaryMatcher::~BytesDictionaryMatcher() {
|
||||
udata_close(file);
|
||||
}
|
||||
|
||||
UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
|
||||
if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
|
||||
if (c == 0x200D) {
|
||||
return 0xFF;
|
||||
} else if (c == 0x200C) {
|
||||
return 0xFE;
|
||||
}
|
||||
int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
|
||||
if (delta < 0 || 0xFD < delta) {
|
||||
return U_SENTINEL;
|
||||
}
|
||||
return (UChar32)delta;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int32_t BytesDictionaryMatcher::getType() const {
|
||||
return DictionaryData::TRIE_TYPE_BYTES;
|
||||
}
|
||||
|
||||
int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
|
||||
int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
||||
int32_t *prefix) const {
|
||||
BytesTrie bt(characters);
|
||||
int32_t startingTextIndex = utext_getNativeIndex(text);
|
||||
int32_t wordCount = 0;
|
||||
int32_t codePointsMatched = 0;
|
||||
|
||||
for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
|
||||
UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
|
||||
int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
|
||||
codePointsMatched += 1;
|
||||
if (USTRINGTRIE_HAS_VALUE(result)) {
|
||||
if (wordCount < limit) {
|
||||
if (values != NULL) {
|
||||
values[wordCount] = bt.getValue();
|
||||
}
|
||||
if (lengths != NULL) {
|
||||
lengths[wordCount] = lengthMatched;
|
||||
}
|
||||
if (cpLengths != NULL) {
|
||||
cpLengths[wordCount] = codePointsMatched;
|
||||
}
|
||||
++wordCount;
|
||||
}
|
||||
if (result == USTRINGTRIE_FINAL_VALUE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (result == USTRINGTRIE_NO_MATCH) {
|
||||
break;
|
||||
}
|
||||
if (lengthMatched >= maxLength) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (prefix != NULL) {
|
||||
*prefix = codePointsMatched;
|
||||
}
|
||||
return wordCount;
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
|
||||
void *outData, UErrorCode *pErrorCode) {
|
||||
const UDataInfo *pInfo;
|
||||
int32_t headerSize;
|
||||
const uint8_t *inBytes;
|
||||
uint8_t *outBytes;
|
||||
const int32_t *inIndexes;
|
||||
int32_t indexes[DictionaryData::IX_COUNT];
|
||||
int32_t i, offset, size;
|
||||
|
||||
headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
|
||||
if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
|
||||
pInfo = (const UDataInfo *)((const char *)inData + 4);
|
||||
if (!(pInfo->dataFormat[0] == 0x44 &&
|
||||
pInfo->dataFormat[1] == 0x69 &&
|
||||
pInfo->dataFormat[2] == 0x63 &&
|
||||
pInfo->dataFormat[3] == 0x74 &&
|
||||
pInfo->formatVersion[0] == 1)) {
|
||||
udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
|
||||
*pErrorCode = U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inBytes = (const uint8_t *)inData + headerSize;
|
||||
outBytes = (uint8_t *)outData + headerSize;
|
||||
|
||||
inIndexes = (const int32_t *)inBytes;
|
||||
if (length >= 0) {
|
||||
length -= headerSize;
|
||||
if (length < (int32_t)(sizeof(indexes))) {
|
||||
udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
|
||||
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < DictionaryData::IX_COUNT; i++) {
|
||||
indexes[i] = udata_readInt32(ds, inIndexes[i]);
|
||||
}
|
||||
|
||||
size = indexes[DictionaryData::IX_TOTAL_SIZE];
|
||||
|
||||
if (length >= 0) {
|
||||
if (length < size) {
|
||||
udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
|
||||
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (inBytes != outBytes) {
|
||||
uprv_memcpy(outBytes, inBytes, size);
|
||||
}
|
||||
|
||||
offset = 0;
|
||||
ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
|
||||
offset = (int32_t)sizeof(indexes);
|
||||
int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
|
||||
int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
|
||||
|
||||
if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
|
||||
ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
|
||||
} else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
|
||||
// nothing to do
|
||||
} else {
|
||||
udata_printError(ds, "udict_swap(): unknown trie type!\n");
|
||||
*pErrorCode = U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// these next two sections are empty in the current format,
|
||||
// but may be used later.
|
||||
offset = nextOffset;
|
||||
nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
|
||||
offset = nextOffset;
|
||||
nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
|
||||
offset = nextOffset;
|
||||
}
|
||||
return headerSize + size;
|
||||
}
|
||||
#endif
|
||||
189
UnicodeConverter/icubuilds-mac/icu/icu/common/dictionarydata.h
Normal file
189
UnicodeConverter/icubuilds-mac/icu/icu/common/dictionarydata.h
Normal file
@ -0,0 +1,189 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* dictionarydata.h
|
||||
*
|
||||
* created on: 2012may31
|
||||
* created by: Markus W. Scherer & Maxime Serrano
|
||||
*/
|
||||
|
||||
#ifndef __DICTIONARYDATA_H__
|
||||
#define __DICTIONARYDATA_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/utext.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "udataswp.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ustringtrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UCharsTrie;
|
||||
class BytesTrie;
|
||||
|
||||
class U_COMMON_API DictionaryData : public UMemory {
|
||||
public:
|
||||
static const int32_t TRIE_TYPE_BYTES; // = 0;
|
||||
static const int32_t TRIE_TYPE_UCHARS; // = 1;
|
||||
static const int32_t TRIE_TYPE_MASK; // = 7;
|
||||
static const int32_t TRIE_HAS_VALUES; // = 8;
|
||||
|
||||
static const int32_t TRANSFORM_NONE; // = 0;
|
||||
static const int32_t TRANSFORM_TYPE_OFFSET; // = 0x1000000;
|
||||
static const int32_t TRANSFORM_TYPE_MASK; // = 0x7f000000;
|
||||
static const int32_t TRANSFORM_OFFSET_MASK; // = 0x1fffff;
|
||||
|
||||
enum {
|
||||
// Byte offsets from the start of the data, after the generic header.
|
||||
IX_STRING_TRIE_OFFSET,
|
||||
IX_RESERVED1_OFFSET,
|
||||
IX_RESERVED2_OFFSET,
|
||||
IX_TOTAL_SIZE,
|
||||
|
||||
// Trie type: TRIE_HAS_VALUES | TRIE_TYPE_BYTES etc.
|
||||
IX_TRIE_TYPE,
|
||||
// Transform specification: TRANSFORM_TYPE_OFFSET | 0xe00 etc.
|
||||
IX_TRANSFORM,
|
||||
|
||||
IX_RESERVED6,
|
||||
IX_RESERVED7,
|
||||
IX_COUNT
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Wrapper class around generic dictionaries, implementing matches().
|
||||
* getType() should return a TRIE_TYPE_??? constant from DictionaryData.
|
||||
*
|
||||
* All implementations of this interface must be thread-safe if they are to be used inside of the
|
||||
* dictionary-based break iteration code.
|
||||
*/
|
||||
class U_COMMON_API DictionaryMatcher : public UMemory {
|
||||
public:
|
||||
DictionaryMatcher() {};
|
||||
virtual ~DictionaryMatcher();
|
||||
// this should emulate CompactTrieDictionary::matches()
|
||||
/* @param text The text in which to look for matching words. Matching begins
|
||||
* at the current position of the UText.
|
||||
* @param maxLength The max length of match to consider. Units are the native indexing
|
||||
* units of the UText.
|
||||
* @param limit Capacity of output arrays, which is also the maximum number of
|
||||
* matching words to be found.
|
||||
* @param lengths output array, filled with the lengths of the matches, in order,
|
||||
* from shortest to longest. Lengths are in native indexing units
|
||||
* of the UText. May be NULL.
|
||||
* @param cpLengths output array, filled with the lengths of the matches, in order,
|
||||
* from shortest to longest. Lengths are the number of Unicode code points.
|
||||
* May be NULL.
|
||||
* @param values Output array, filled with the values associated with the words found.
|
||||
* May be NULL.
|
||||
* @param prefix Output parameter, the code point length of the prefix match, even if that
|
||||
* prefix didn't lead to a complete word. Will always be >= the cpLength
|
||||
* of the longest complete word matched. May be NULL.
|
||||
* @return Number of matching words found.
|
||||
*/
|
||||
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
|
||||
int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
||||
int32_t *prefix) const = 0;
|
||||
|
||||
/** @return DictionaryData::TRIE_TYPE_XYZ */
|
||||
virtual int32_t getType() const = 0;
|
||||
};
|
||||
|
||||
// Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary
|
||||
class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher {
|
||||
public:
|
||||
// constructs a new UCharsDictionaryMatcher.
|
||||
// The UDataMemory * will be closed on this object's destruction.
|
||||
UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { }
|
||||
virtual ~UCharsDictionaryMatcher();
|
||||
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
|
||||
int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
||||
int32_t *prefix) const;
|
||||
virtual int32_t getType() const;
|
||||
private:
|
||||
const UChar *characters;
|
||||
UDataMemory *file;
|
||||
};
|
||||
|
||||
// Implementation of the DictionaryMatcher interface for a BytesTrie dictionary
|
||||
class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher {
|
||||
public:
|
||||
// constructs a new BytesTrieDictionaryMatcher
|
||||
// the transform constant should be the constant read from the file, not a masked version!
|
||||
// the UDataMemory * fed in here will be closed on this object's destruction
|
||||
BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f)
|
||||
: characters(c), transformConstant(t), file(f) { }
|
||||
virtual ~BytesDictionaryMatcher();
|
||||
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
|
||||
int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
||||
int32_t *prefix) const;
|
||||
virtual int32_t getType() const;
|
||||
private:
|
||||
UChar32 transform(UChar32 c) const;
|
||||
|
||||
const char *characters;
|
||||
int32_t transformConstant;
|
||||
UDataMemory *file;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Format of dictionary .dict data files.
|
||||
* Format version 1.0.
|
||||
*
|
||||
* A dictionary .dict data file contains a byte-serialized BytesTrie or
|
||||
* a UChars-serialized UCharsTrie.
|
||||
* Such files are used in dictionary-based break iteration (DBBI).
|
||||
*
|
||||
* For a BytesTrie, a transformation type is specified for
|
||||
* transforming Unicode strings into byte sequences.
|
||||
*
|
||||
* A .dict file begins with a standard ICU data file header
|
||||
* (DataHeader, see ucmndata.h and unicode/udata.h).
|
||||
* The UDataInfo.dataVersion field is currently unused (set to 0.0.0.0).
|
||||
*
|
||||
* After the header, the file contains the following parts.
|
||||
* Constants are defined in the DictionaryData class.
|
||||
*
|
||||
* For the data structure of BytesTrie & UCharsTrie see
|
||||
* http://site.icu-project.org/design/struct/tries
|
||||
* and the bytestrie.h and ucharstrie.h header files.
|
||||
*
|
||||
* int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;
|
||||
*
|
||||
* The first four indexes are byte offsets in ascending order.
|
||||
* Each byte offset marks the start of the next part in the data file,
|
||||
* and the end of the previous one.
|
||||
* When two consecutive byte offsets are the same, then the corresponding part is empty.
|
||||
* Byte offsets are offsets from after the header,
|
||||
* that is, from the beginning of the indexes[].
|
||||
* Each part starts at an offset with proper alignment for its data.
|
||||
* If necessary, the previous part may include padding bytes to achieve this alignment.
|
||||
*
|
||||
* trieType=indexes[IX_TRIE_TYPE] defines the trie type.
|
||||
* transform=indexes[IX_TRANSFORM] defines the Unicode-to-bytes transformation.
|
||||
* If the transformation type is TRANSFORM_TYPE_OFFSET,
|
||||
* then the lower 21 bits contain the offset code point.
|
||||
* Each code point c is mapped to byte b = (c - offset).
|
||||
* Code points outside the range offset..(offset+0xff) cannot be mapped
|
||||
* and do not occur in the dictionary.
|
||||
*
|
||||
* stringTrie; -- a serialized BytesTrie or UCharsTrie
|
||||
*
|
||||
* The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set in trieType),
|
||||
* or it maps all strings to 0 (TRIE_HAS_VALUES bit not set).
|
||||
*/
|
||||
|
||||
#endif /* !UCONFIG_NO_BREAK_ITERATION */
|
||||
#endif /* __DICTIONARYDATA_H__ */
|
||||
61
UnicodeConverter/icubuilds-mac/icu/icu/common/dtintrv.cpp
Normal file
61
UnicodeConverter/icubuilds-mac/icu/icu/common/dtintrv.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (C) 2008, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* File DTINTRV.CPP
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "unicode/dtintrv.h"
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
|
||||
|
||||
//DateInterval::DateInterval(){}
|
||||
|
||||
|
||||
DateInterval::DateInterval(UDate from, UDate to)
|
||||
: fromDate(from),
|
||||
toDate(to)
|
||||
{}
|
||||
|
||||
|
||||
DateInterval::~DateInterval(){}
|
||||
|
||||
|
||||
DateInterval::DateInterval(const DateInterval& other)
|
||||
: UObject(other) {
|
||||
*this = other;
|
||||
}
|
||||
|
||||
|
||||
DateInterval&
|
||||
DateInterval::operator=(const DateInterval& other) {
|
||||
if ( this != &other ) {
|
||||
fromDate = other.fromDate;
|
||||
toDate = other.toDate;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
DateInterval*
|
||||
DateInterval::clone() const {
|
||||
return new DateInterval(*this);
|
||||
}
|
||||
|
||||
|
||||
UBool
|
||||
DateInterval::operator==(const DateInterval& other) const {
|
||||
return ( fromDate == other.fromDate && toDate == other.toDate );
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
40
UnicodeConverter/icubuilds-mac/icu/icu/common/errorcode.cpp
Normal file
40
UnicodeConverter/icubuilds-mac/icu/icu/common/errorcode.cpp
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: errorcode.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009mar10
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/errorcode.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
ErrorCode::~ErrorCode() {}
|
||||
|
||||
UErrorCode ErrorCode::reset() {
|
||||
UErrorCode code = errorCode;
|
||||
errorCode = U_ZERO_ERROR;
|
||||
return code;
|
||||
}
|
||||
|
||||
void ErrorCode::assertSuccess() const {
|
||||
if(isFailure()) {
|
||||
handleFailure();
|
||||
}
|
||||
}
|
||||
|
||||
const char* ErrorCode::errorName() const {
|
||||
return u_errorName(errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
671
UnicodeConverter/icubuilds-mac/icu/icu/common/filteredbrk.cpp
Normal file
671
UnicodeConverter/icubuilds-mac/icu/icu/common/filteredbrk.cpp
Normal file
@ -0,0 +1,671 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2014-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
|
||||
#include "cmemory.h"
|
||||
|
||||
#include "unicode/filteredbrk.h"
|
||||
#include "unicode/ucharstriebuilder.h"
|
||||
#include "unicode/ures.h"
|
||||
|
||||
#include "uresimp.h" // ures_getByKeyWithFallback
|
||||
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
|
||||
#include "uvector.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef FB_DEBUG
|
||||
#define FB_DEBUG 0
|
||||
#endif
|
||||
|
||||
#if FB_DEBUG
|
||||
#include <stdio.h>
|
||||
static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
|
||||
char buf[2048];
|
||||
if(s) {
|
||||
s->extract(0,s->length(),buf,2048);
|
||||
} else {
|
||||
strcpy(buf,"NULL");
|
||||
}
|
||||
fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
|
||||
f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
|
||||
}
|
||||
|
||||
#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
|
||||
#else
|
||||
#define FB_TRACE(m,s,b,d)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Used with sortedInsert()
|
||||
*/
|
||||
static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
|
||||
const UnicodeString &a = *(const UnicodeString*)t1.pointer;
|
||||
const UnicodeString &b = *(const UnicodeString*)t2.pointer;
|
||||
return a.compare(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* A UVector which implements a set of strings.
|
||||
*/
|
||||
class U_COMMON_API UStringSet : public UVector {
|
||||
public:
|
||||
UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
|
||||
uhash_compareUnicodeString,
|
||||
1,
|
||||
status) {}
|
||||
virtual ~UStringSet();
|
||||
/**
|
||||
* Is this UnicodeSet contained?
|
||||
*/
|
||||
inline UBool contains(const UnicodeString& s) {
|
||||
return contains((void*) &s);
|
||||
}
|
||||
using UVector::contains;
|
||||
/**
|
||||
* Return the ith UnicodeString alias
|
||||
*/
|
||||
inline const UnicodeString* getStringAt(int32_t i) const {
|
||||
return (const UnicodeString*)elementAt(i);
|
||||
}
|
||||
/**
|
||||
* Adopt the UnicodeString if not already contained.
|
||||
* Caller no longer owns the pointer in any case.
|
||||
* @return true if adopted successfully, false otherwise (error, or else duplicate)
|
||||
*/
|
||||
inline UBool adopt(UnicodeString *str, UErrorCode &status) {
|
||||
if(U_FAILURE(status) || contains(*str)) {
|
||||
delete str;
|
||||
return false;
|
||||
} else {
|
||||
sortedInsert(str, compareUnicodeString, status);
|
||||
if(U_FAILURE(status)) {
|
||||
delete str;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Add by value.
|
||||
* @return true if successfully adopted.
|
||||
*/
|
||||
inline UBool add(const UnicodeString& str, UErrorCode &status) {
|
||||
if(U_FAILURE(status)) return false;
|
||||
UnicodeString *t = new UnicodeString(str);
|
||||
if(t==NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR; return false;
|
||||
}
|
||||
return adopt(t, status);
|
||||
}
|
||||
/**
|
||||
* Remove this string.
|
||||
* @return true if successfully removed, false otherwise (error, or else it wasn't there)
|
||||
*/
|
||||
inline UBool remove(const UnicodeString &s, UErrorCode &status) {
|
||||
if(U_FAILURE(status)) return false;
|
||||
return removeElement((void*) &s);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Virtual, won't be inlined
|
||||
*/
|
||||
UStringSet::~UStringSet() {}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
|
||||
|
||||
/* Filtered Break constants */
|
||||
static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
|
||||
static const int32_t kMATCH = (1<<1); //< exact match - skip this one.
|
||||
static const int32_t kSuppressInReverse = (1<<0);
|
||||
static const int32_t kAddToForward = (1<<1);
|
||||
static const UChar kFULLSTOP = 0x002E; // '.'
|
||||
|
||||
/**
|
||||
* Shared data for SimpleFilteredSentenceBreakIterator
|
||||
*/
|
||||
class SimpleFilteredSentenceBreakData : public UMemory {
|
||||
public:
|
||||
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
|
||||
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
|
||||
SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
|
||||
SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
|
||||
virtual ~SimpleFilteredSentenceBreakData();
|
||||
|
||||
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
|
||||
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
|
||||
int32_t refcount;
|
||||
};
|
||||
|
||||
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
|
||||
|
||||
/**
|
||||
* Concrete implementation
|
||||
*/
|
||||
class SimpleFilteredSentenceBreakIterator : public BreakIterator {
|
||||
public:
|
||||
SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
|
||||
SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
|
||||
virtual ~SimpleFilteredSentenceBreakIterator();
|
||||
private:
|
||||
SimpleFilteredSentenceBreakData *fData;
|
||||
LocalPointer<BreakIterator> fDelegate;
|
||||
LocalUTextPointer fText;
|
||||
|
||||
/* -- subclass interface -- */
|
||||
public:
|
||||
/* -- cloning and other subclass stuff -- */
|
||||
virtual BreakIterator * createBufferClone(void * /*stackBuffer*/,
|
||||
int32_t &/*BufferSize*/,
|
||||
UErrorCode &status) {
|
||||
// for now - always deep clone
|
||||
status = U_SAFECLONE_ALLOCATED_WARNING;
|
||||
return clone();
|
||||
}
|
||||
virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); }
|
||||
virtual UClassID getDynamicClassID(void) const { return NULL; }
|
||||
virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
|
||||
|
||||
/* -- text modifying -- */
|
||||
virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
|
||||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
|
||||
virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
|
||||
virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
|
||||
|
||||
/* -- other functions that are just delegated -- */
|
||||
virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
|
||||
virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
|
||||
|
||||
/* -- ITERATION -- */
|
||||
virtual int32_t first(void);
|
||||
virtual int32_t preceding(int32_t offset);
|
||||
virtual int32_t previous(void);
|
||||
virtual UBool isBoundary(int32_t offset);
|
||||
virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
|
||||
|
||||
virtual int32_t next(void);
|
||||
|
||||
virtual int32_t next(int32_t n);
|
||||
virtual int32_t following(int32_t offset);
|
||||
virtual int32_t last(void);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Given that the fDelegate has already given its "initial" answer,
|
||||
* find the NEXT actual (non-excepted) break.
|
||||
* @param n initial position from delegate
|
||||
* @return new break position or UBRK_DONE
|
||||
*/
|
||||
int32_t internalNext(int32_t n);
|
||||
/**
|
||||
* Given that the fDelegate has already given its "initial" answer,
|
||||
* find the PREV actual (non-excepted) break.
|
||||
* @param n initial position from delegate
|
||||
* @return new break position or UBRK_DONE
|
||||
*/
|
||||
int32_t internalPrev(int32_t n);
|
||||
/**
|
||||
* set up the UText with the value of the fDelegate.
|
||||
* Call this before calling breakExceptionAt.
|
||||
* May be able to avoid excess calls
|
||||
*/
|
||||
void resetState(UErrorCode &status);
|
||||
/**
|
||||
* Is there a match (exception) at this spot?
|
||||
*/
|
||||
enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
|
||||
/**
|
||||
* Determine if there is an exception at this spot
|
||||
* @param n spot to check
|
||||
* @return kNoExceptionHere or kExceptionHere
|
||||
**/
|
||||
enum EFBMatchResult breakExceptionAt(int32_t n);
|
||||
};
|
||||
|
||||
SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
|
||||
: BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
|
||||
BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
|
||||
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
|
||||
fDelegate(adopt)
|
||||
{
|
||||
// all set..
|
||||
}
|
||||
|
||||
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
|
||||
fData = fData->decr();
|
||||
}
|
||||
|
||||
void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
|
||||
fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
|
||||
}
|
||||
|
||||
SimpleFilteredSentenceBreakIterator::EFBMatchResult
|
||||
SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
|
||||
int64_t bestPosn = -1;
|
||||
int32_t bestValue = -1;
|
||||
// loops while 'n' points to an exception.
|
||||
utext_setNativeIndex(fText.getAlias(), n); // from n..
|
||||
fData->fBackwardsTrie->reset();
|
||||
UChar32 uch;
|
||||
|
||||
//if(debug2) u_printf(" n@ %d\n", n);
|
||||
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
|
||||
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
|
||||
// TODO only do this the 1st time?
|
||||
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
|
||||
} else {
|
||||
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
|
||||
uch = utext_next32(fText.getAlias());
|
||||
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
|
||||
}
|
||||
|
||||
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
|
||||
|
||||
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
|
||||
USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
|
||||
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
bestValue = fData->fBackwardsTrie->getValue();
|
||||
}
|
||||
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
|
||||
}
|
||||
|
||||
if(USTRINGTRIE_MATCHES(r)) { // exact match?
|
||||
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
bestValue = fData->fBackwardsTrie->getValue();
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
}
|
||||
|
||||
if(bestPosn>=0) {
|
||||
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
|
||||
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
|
||||
//int32_t bestValue = fBackwardsTrie->getValue();
|
||||
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
|
||||
|
||||
if(bestValue == kMATCH) { // exact match!
|
||||
//if(debug2) u_printf(" exact backward match\n");
|
||||
return kExceptionHere; // See if the next is another exception.
|
||||
} else if(bestValue == kPARTIAL
|
||||
&& fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
|
||||
//if(debug2) u_printf(" partial backward match\n");
|
||||
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
|
||||
// to see if it matches something going forward.
|
||||
fData->fForwardsPartialTrie->reset();
|
||||
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
|
||||
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
|
||||
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
|
||||
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
|
||||
USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
|
||||
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
|
||||
}
|
||||
if(USTRINGTRIE_MATCHES(rfwd)) {
|
||||
//if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
|
||||
// only full matches here, nothing to check
|
||||
// skip the next:
|
||||
return kExceptionHere;
|
||||
} else {
|
||||
//if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
|
||||
// no match (no exception) -return the 'underlying' break
|
||||
return kNoExceptionHere;
|
||||
}
|
||||
} else {
|
||||
return kNoExceptionHere; // internal error and/or no forwards trie
|
||||
}
|
||||
} else {
|
||||
//if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match
|
||||
return kNoExceptionHere; // No match - so exit. Not an exception.
|
||||
}
|
||||
}
|
||||
|
||||
// the workhorse single next.
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
|
||||
if(n == UBRK_DONE || // at end or
|
||||
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// OK, do we need to break here?
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// refresh text
|
||||
resetState(status);
|
||||
if(U_FAILURE(status)) return UBRK_DONE; // bail out
|
||||
int64_t utextLen = utext_nativeLength(fText.getAlias());
|
||||
|
||||
//if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
|
||||
while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
|
||||
SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
|
||||
|
||||
switch(m) {
|
||||
case kExceptionHere:
|
||||
n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
|
||||
continue;
|
||||
|
||||
default:
|
||||
case kNoExceptionHere:
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
|
||||
if(n == 0 || n == UBRK_DONE || // at end or
|
||||
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// OK, do we need to break here?
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// refresh text
|
||||
resetState(status);
|
||||
if(U_FAILURE(status)) return UBRK_DONE; // bail out
|
||||
|
||||
//if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
|
||||
while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
|
||||
SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
|
||||
|
||||
switch(m) {
|
||||
case kExceptionHere:
|
||||
n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
|
||||
continue;
|
||||
|
||||
default:
|
||||
case kNoExceptionHere:
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::next() {
|
||||
return internalNext(fDelegate->next());
|
||||
}
|
||||
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::first(void) {
|
||||
return internalNext(fDelegate->first());
|
||||
}
|
||||
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
|
||||
return internalPrev(fDelegate->preceding(offset));
|
||||
}
|
||||
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::previous(void) {
|
||||
return internalPrev(fDelegate->previous());
|
||||
}
|
||||
|
||||
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
|
||||
if(!fDelegate->isBoundary(offset)) return false; // no break to suppress
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
resetState(status);
|
||||
|
||||
SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);
|
||||
|
||||
switch(m) {
|
||||
case kExceptionHere:
|
||||
return false;
|
||||
default:
|
||||
case kNoExceptionHere:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
|
||||
return internalNext(fDelegate->next(offset));
|
||||
}
|
||||
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
|
||||
return internalNext(fDelegate->following(offset));
|
||||
}
|
||||
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::last(void) {
|
||||
// Don't suppress a break opportunity at the end of text.
|
||||
return fDelegate->last();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Concrete implementation of builder class.
|
||||
*/
|
||||
class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
|
||||
public:
|
||||
virtual ~SimpleFilteredBreakIteratorBuilder();
|
||||
SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
|
||||
SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
|
||||
virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
|
||||
virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
|
||||
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
|
||||
private:
|
||||
UStringSet fSet;
|
||||
};
|
||||
|
||||
SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
|
||||
{
|
||||
}
|
||||
|
||||
SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
|
||||
: fSet(status)
|
||||
{
|
||||
}
|
||||
|
||||
SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
|
||||
: fSet(status)
|
||||
{
|
||||
if(U_SUCCESS(status)) {
|
||||
LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &status));
|
||||
LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &status));
|
||||
LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &status));
|
||||
if(U_FAILURE(status)) return; // leaves the builder empty, if you try to use it.
|
||||
|
||||
LocalUResourceBundlePointer strs;
|
||||
UErrorCode subStatus = status;
|
||||
do {
|
||||
strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
|
||||
if(strs.isValid() && U_SUCCESS(subStatus)) {
|
||||
UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
|
||||
suppressBreakAfter(str, status); // load the string
|
||||
}
|
||||
} while (strs.isValid() && U_SUCCESS(subStatus));
|
||||
if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
|
||||
status = subStatus;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
|
||||
{
|
||||
UBool r = fSet.add(exception, status);
|
||||
FB_TRACE("suppressBreakAfter",&exception,r,0);
|
||||
return r;
|
||||
}
|
||||
|
||||
UBool
|
||||
SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
|
||||
{
|
||||
UBool r = fSet.remove(exception, status);
|
||||
FB_TRACE("unsuppressBreakAfter",&exception,r,0);
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
|
||||
* Work around this.
|
||||
*
|
||||
* Note: "new UnicodeString[subCount]" ends up calling global operator new
|
||||
* on MSVC2012 for some reason.
|
||||
*/
|
||||
static inline UnicodeString* newUnicodeStringArray(size_t count) {
|
||||
return new UnicodeString[count ? count : 1];
|
||||
}
|
||||
|
||||
BreakIterator *
|
||||
SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
|
||||
LocalPointer<BreakIterator> adopt(adoptBreakIterator);
|
||||
|
||||
LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
|
||||
LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
|
||||
if(U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int32_t revCount = 0;
|
||||
int32_t fwdCount = 0;
|
||||
|
||||
int32_t subCount = fSet.size();
|
||||
|
||||
UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
|
||||
|
||||
LocalArray<UnicodeString> ustrs(ustrs_ptr);
|
||||
|
||||
LocalMemory<int> partials;
|
||||
partials.allocateInsteadAndReset(subCount);
|
||||
|
||||
LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs.
|
||||
LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M."
|
||||
|
||||
int n=0;
|
||||
for ( int32_t i = 0;
|
||||
i<fSet.size();
|
||||
i++) {
|
||||
const UnicodeString *abbr = fSet.getStringAt(i);
|
||||
if(abbr) {
|
||||
FB_TRACE("build",abbr,TRUE,i);
|
||||
ustrs[n] = *abbr; // copy by value
|
||||
FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
|
||||
} else {
|
||||
FB_TRACE("build",abbr,FALSE,i);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
partials[n] = 0; // default: not partial
|
||||
n++;
|
||||
}
|
||||
// first pass - find partials.
|
||||
for(int i=0;i<subCount;i++) {
|
||||
int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
|
||||
if(nn>-1 && (nn+1)!=ustrs[i].length()) {
|
||||
FB_TRACE("partial",&ustrs[i],FALSE,i);
|
||||
// is partial.
|
||||
// is it unique?
|
||||
int sameAs = -1;
|
||||
for(int j=0;j<subCount;j++) {
|
||||
if(j==i) continue;
|
||||
if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
|
||||
FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
|
||||
//UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
|
||||
if(partials[j]==0) { // hasn't been processed yet
|
||||
partials[j] = kSuppressInReverse | kAddToForward;
|
||||
FB_TRACE("suppressing",&ustrs[j],FALSE,j);
|
||||
} else if(partials[j] & kSuppressInReverse) {
|
||||
sameAs = j; // the other entry is already in the reverse table.
|
||||
}
|
||||
}
|
||||
}
|
||||
FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
|
||||
FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
|
||||
UnicodeString prefix(ustrs[i], 0, nn+1);
|
||||
if(sameAs == -1 && partials[i] == 0) {
|
||||
// first one - add the prefix to the reverse table.
|
||||
prefix.reverse();
|
||||
builder->add(prefix, kPARTIAL, status);
|
||||
revCount++;
|
||||
FB_TRACE("Added partial",&prefix,FALSE, i);
|
||||
FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
|
||||
partials[i] = kSuppressInReverse | kAddToForward;
|
||||
} else {
|
||||
FB_TRACE("NOT adding partial",&prefix,FALSE, i);
|
||||
FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int i=0;i<subCount;i++) {
|
||||
if(partials[i]==0) {
|
||||
ustrs[i].reverse();
|
||||
builder->add(ustrs[i], kMATCH, status);
|
||||
revCount++;
|
||||
FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
|
||||
} else {
|
||||
FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
|
||||
|
||||
// an optimization would be to only add the portion after the '.'
|
||||
// for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
|
||||
// instead of "Ph.D." since we already know the "Ph." part is a match.
|
||||
// would need the trie to be able to hold 0-length strings, though.
|
||||
builder2->add(ustrs[i], kMATCH, status); // forward
|
||||
fwdCount++;
|
||||
//ustrs[i].reverse();
|
||||
////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
|
||||
}
|
||||
}
|
||||
FB_TRACE("AbbrCount",NULL,FALSE, subCount);
|
||||
|
||||
if(revCount>0) {
|
||||
backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
|
||||
if(U_FAILURE(status)) {
|
||||
FB_TRACE(u_errorName(status),NULL,FALSE, -1);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if(fwdCount>0) {
|
||||
forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
|
||||
if(U_FAILURE(status)) {
|
||||
FB_TRACE(u_errorName(status),NULL,FALSE, -1);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
|
||||
}
|
||||
|
||||
|
||||
// ----------- Base class implementation
|
||||
|
||||
FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
|
||||
}
|
||||
|
||||
FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
|
||||
}
|
||||
|
||||
FilteredBreakIteratorBuilder *
|
||||
FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
|
||||
if(U_FAILURE(status)) return NULL;
|
||||
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
|
||||
return (U_SUCCESS(status))? ret.orphan(): NULL;
|
||||
}
|
||||
|
||||
FilteredBreakIteratorBuilder *
|
||||
FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) {
|
||||
if(U_FAILURE(status)) return NULL;
|
||||
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
|
||||
return (U_SUCCESS(status))? ret.orphan(): NULL;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
@ -0,0 +1,288 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: filterednormalizer2.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009dec10
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "cpputils.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
FilteredNormalizer2::~FilteredNormalizer2() {}
|
||||
|
||||
UnicodeString &
|
||||
FilteredNormalizer2::normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(src, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
}
|
||||
if(&dest==&src) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return dest;
|
||||
}
|
||||
dest.remove();
|
||||
return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
|
||||
}
|
||||
|
||||
// Internal: No argument checking, and appends to dest.
|
||||
// Pass as input spanCondition the one that is likely to yield a non-zero
|
||||
// span length at the start of src.
|
||||
// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
|
||||
// USET_SPAN_SIMPLE should be passed in for the start of src
|
||||
// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
|
||||
// an in-filter prefix.
|
||||
UnicodeString &
|
||||
FilteredNormalizer2::normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
USetSpanCondition spanCondition,
|
||||
UErrorCode &errorCode) const {
|
||||
UnicodeString tempDest; // Don't throw away destination buffer between iterations.
|
||||
for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
|
||||
int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
|
||||
int32_t spanLength=spanLimit-prevSpanLimit;
|
||||
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
|
||||
if(spanLength!=0) {
|
||||
dest.append(src, prevSpanLimit, spanLength);
|
||||
}
|
||||
spanCondition=USET_SPAN_SIMPLE;
|
||||
} else {
|
||||
if(spanLength!=0) {
|
||||
// Not norm2.normalizeSecondAndAppend() because we do not want
|
||||
// to modify the non-filter part of dest.
|
||||
dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
|
||||
tempDest, errorCode));
|
||||
if(U_FAILURE(errorCode)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
spanCondition=USET_SPAN_NOT_CONTAINED;
|
||||
}
|
||||
prevSpanLimit=spanLimit;
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
FilteredNormalizer2::append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UBool doNormalize,
|
||||
UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(first, errorCode);
|
||||
uprv_checkCanGetBuffer(second, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return first;
|
||||
}
|
||||
if(&first==&second) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return first;
|
||||
}
|
||||
if(first.isEmpty()) {
|
||||
if(doNormalize) {
|
||||
return normalize(second, first, errorCode);
|
||||
} else {
|
||||
return first=second;
|
||||
}
|
||||
}
|
||||
// merge the in-filter suffix of the first string with the in-filter prefix of the second
|
||||
int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
|
||||
if(prefixLimit!=0) {
|
||||
UnicodeString prefix(second.tempSubString(0, prefixLimit));
|
||||
int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
|
||||
if(suffixStart==0) {
|
||||
if(doNormalize) {
|
||||
norm2.normalizeSecondAndAppend(first, prefix, errorCode);
|
||||
} else {
|
||||
norm2.append(first, prefix, errorCode);
|
||||
}
|
||||
} else {
|
||||
UnicodeString middle(first, suffixStart, INT32_MAX);
|
||||
if(doNormalize) {
|
||||
norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
|
||||
} else {
|
||||
norm2.append(middle, prefix, errorCode);
|
||||
}
|
||||
first.replace(suffixStart, INT32_MAX, middle);
|
||||
}
|
||||
}
|
||||
if(prefixLimit<second.length()) {
|
||||
UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
|
||||
if(doNormalize) {
|
||||
normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
|
||||
} else {
|
||||
first.append(rest);
|
||||
}
|
||||
}
|
||||
return first;
|
||||
}
|
||||
|
||||
UBool
|
||||
FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
return set.contains(c) && norm2.getDecomposition(c, decomposition);
|
||||
}
|
||||
|
||||
UBool
|
||||
FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
|
||||
}
|
||||
|
||||
UChar32
|
||||
FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
|
||||
return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
|
||||
}
|
||||
|
||||
uint8_t
|
||||
FilteredNormalizer2::getCombiningClass(UChar32 c) const {
|
||||
return set.contains(c) ? norm2.getCombiningClass(c) : 0;
|
||||
}
|
||||
|
||||
UBool
|
||||
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(s, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
|
||||
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
|
||||
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
|
||||
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
|
||||
spanCondition=USET_SPAN_SIMPLE;
|
||||
} else {
|
||||
if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
|
||||
U_FAILURE(errorCode)
|
||||
) {
|
||||
return FALSE;
|
||||
}
|
||||
spanCondition=USET_SPAN_NOT_CONTAINED;
|
||||
}
|
||||
prevSpanLimit=spanLimit;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UNormalizationCheckResult
|
||||
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(s, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
UNormalizationCheckResult result=UNORM_YES;
|
||||
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
|
||||
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
|
||||
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
|
||||
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
|
||||
spanCondition=USET_SPAN_SIMPLE;
|
||||
} else {
|
||||
UNormalizationCheckResult qcResult=
|
||||
norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
|
||||
if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
|
||||
return qcResult;
|
||||
} else if(qcResult==UNORM_MAYBE) {
|
||||
result=qcResult;
|
||||
}
|
||||
spanCondition=USET_SPAN_NOT_CONTAINED;
|
||||
}
|
||||
prevSpanLimit=spanLimit;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t
|
||||
FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(s, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
|
||||
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
|
||||
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
|
||||
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
|
||||
spanCondition=USET_SPAN_SIMPLE;
|
||||
} else {
|
||||
int32_t yesLimit=
|
||||
prevSpanLimit+
|
||||
norm2.spanQuickCheckYes(
|
||||
s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
|
||||
if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
|
||||
return yesLimit;
|
||||
}
|
||||
spanCondition=USET_SPAN_NOT_CONTAINED;
|
||||
}
|
||||
prevSpanLimit=spanLimit;
|
||||
}
|
||||
return s.length();
|
||||
}
|
||||
|
||||
UBool
|
||||
FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
|
||||
return !set.contains(c) || norm2.hasBoundaryBefore(c);
|
||||
}
|
||||
|
||||
UBool
|
||||
FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
|
||||
return !set.contains(c) || norm2.hasBoundaryAfter(c);
|
||||
}
|
||||
|
||||
UBool
|
||||
FilteredNormalizer2::isInert(UChar32 c) const {
|
||||
return !set.contains(c) || norm2.isInert(c);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// C API ------------------------------------------------------------------- ***
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
U_CAPI UNormalizer2 * U_EXPORT2
|
||||
unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(filterSet==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
|
||||
*UnicodeSet::fromUSet(filterSet));
|
||||
if(fn2==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return (UNormalizer2 *)fn2;
|
||||
}
|
||||
|
||||
#endif // !UCONFIG_NO_NORMALIZATION
|
||||
212
UnicodeConverter/icubuilds-mac/icu/icu/common/hash.h
Normal file
212
UnicodeConverter/icubuilds-mac/icu/icu/common/hash.h
Normal file
@ -0,0 +1,212 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1997-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
* Date Name Description
|
||||
* 03/28/00 aliu Creation.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef HASH_H
|
||||
#define HASH_H
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cmemory.h"
|
||||
#include "uhash.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
|
||||
* hashtable implemented in C. Hashtable is designed to be idiomatic and
|
||||
* easy-to-use in C++.
|
||||
*
|
||||
* Hashtable is an INTERNAL CLASS.
|
||||
*/
|
||||
class U_COMMON_API Hashtable : public UMemory {
|
||||
UHashtable* hash;
|
||||
UHashtable hashObj;
|
||||
|
||||
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Construct a hashtable
|
||||
* @param ignoreKeyCase If true, keys are case insensitive.
|
||||
* @param status Error code
|
||||
*/
|
||||
Hashtable(UBool ignoreKeyCase, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Construct a hashtable
|
||||
* @param keyComp Comparator for comparing the keys
|
||||
* @param valueComp Comparator for comparing the values
|
||||
* @param status Error code
|
||||
*/
|
||||
Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Construct a hashtable
|
||||
* @param status Error code
|
||||
*/
|
||||
Hashtable(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Construct a hashtable, _disregarding any error_. Use this constructor
|
||||
* with caution.
|
||||
*/
|
||||
Hashtable();
|
||||
|
||||
/**
|
||||
* Non-virtual destructor; make this virtual if Hashtable is subclassed
|
||||
* in the future.
|
||||
*/
|
||||
~Hashtable();
|
||||
|
||||
UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
|
||||
|
||||
int32_t count() const;
|
||||
|
||||
void* put(const UnicodeString& key, void* value, UErrorCode& status);
|
||||
|
||||
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
|
||||
|
||||
void* get(const UnicodeString& key) const;
|
||||
|
||||
int32_t geti(const UnicodeString& key) const;
|
||||
|
||||
void* remove(const UnicodeString& key);
|
||||
|
||||
int32_t removei(const UnicodeString& key);
|
||||
|
||||
void removeAll(void);
|
||||
|
||||
const UHashElement* find(const UnicodeString& key) const;
|
||||
|
||||
/**
|
||||
* @param pos - must be UHASH_FIRST on first call, and untouched afterwards.
|
||||
* @see uhash_nextElement
|
||||
*/
|
||||
const UHashElement* nextElement(int32_t& pos) const;
|
||||
|
||||
UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
|
||||
|
||||
UValueComparator* setValueComparator(UValueComparator* valueComp);
|
||||
|
||||
UBool equals(const Hashtable& that) const;
|
||||
private:
|
||||
Hashtable(const Hashtable &other); // forbid copying of this class
|
||||
Hashtable &operator=(const Hashtable &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
/*********************************************************************
|
||||
* Implementation
|
||||
********************************************************************/
|
||||
|
||||
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
|
||||
UValueComparator *valueComp, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
hash = &hashObj;
|
||||
uhash_setKeyDeleter(hash, uprv_deleteUObject);
|
||||
}
|
||||
}
|
||||
|
||||
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
|
||||
UErrorCode& status) : hash(0) {
|
||||
init( uhash_hashUnicodeString, keyComp, valueComp, status);
|
||||
}
|
||||
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
|
||||
: hash(0)
|
||||
{
|
||||
init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
|
||||
: uhash_hashUnicodeString,
|
||||
ignoreKeyCase ? uhash_compareCaselessUnicodeString
|
||||
: uhash_compareUnicodeString,
|
||||
NULL,
|
||||
status);
|
||||
}
|
||||
|
||||
inline Hashtable::Hashtable(UErrorCode& status)
|
||||
: hash(0)
|
||||
{
|
||||
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
|
||||
}
|
||||
|
||||
inline Hashtable::Hashtable()
|
||||
: hash(0)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
|
||||
}
|
||||
|
||||
inline Hashtable::~Hashtable() {
|
||||
if (hash != NULL) {
|
||||
uhash_close(hash);
|
||||
}
|
||||
}
|
||||
|
||||
inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
|
||||
return uhash_setValueDeleter(hash, fn);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::count() const {
|
||||
return uhash_count(hash);
|
||||
}
|
||||
|
||||
inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
|
||||
return uhash_put(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
|
||||
return uhash_puti(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline void* Hashtable::get(const UnicodeString& key) const {
|
||||
return uhash_get(hash, &key);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::geti(const UnicodeString& key) const {
|
||||
return uhash_geti(hash, &key);
|
||||
}
|
||||
|
||||
inline void* Hashtable::remove(const UnicodeString& key) {
|
||||
return uhash_remove(hash, &key);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::removei(const UnicodeString& key) {
|
||||
return uhash_removei(hash, &key);
|
||||
}
|
||||
|
||||
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
|
||||
return uhash_find(hash, &key);
|
||||
}
|
||||
|
||||
inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
|
||||
return uhash_nextElement(hash, &pos);
|
||||
}
|
||||
|
||||
inline void Hashtable::removeAll(void) {
|
||||
uhash_removeAll(hash);
|
||||
}
|
||||
|
||||
inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
|
||||
return uhash_setKeyComparator(hash, keyComp);
|
||||
}
|
||||
|
||||
inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
|
||||
return uhash_setValueComparator(hash, valueComp);
|
||||
}
|
||||
|
||||
inline UBool Hashtable::equals(const Hashtable& that)const{
|
||||
return uhash_equals(hash, that.hash);
|
||||
}
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
29
UnicodeConverter/icubuilds-mac/icu/icu/common/icudataver.c
Normal file
29
UnicodeConverter/icubuilds-mac/icu/icu/common/icudataver.c
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/icudataver.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "uresimp.h" /* for ures_getVersionByKey */
|
||||
|
||||
U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
|
||||
UResourceBundle *icudatares = NULL;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (dataVersionFillin != NULL) {
|
||||
icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
|
||||
}
|
||||
ures_close(icudatares);
|
||||
}
|
||||
}
|
||||
882
UnicodeConverter/icubuilds-mac/icu/icu/common/icuplug.cpp
Normal file
882
UnicodeConverter/icubuilds-mac/icu/icu/common/icuplug.cpp
Normal file
@ -0,0 +1,882 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : icuplug.c
|
||||
*
|
||||
* Date Name Description
|
||||
* 10/29/2009 sl New.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/icuplug.h"
|
||||
|
||||
|
||||
#if UCONFIG_ENABLE_PLUGINS
|
||||
|
||||
|
||||
#include "icuplugimp.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include "putilimp.h"
|
||||
#include "ucln.h"
|
||||
#include <stdio.h>
|
||||
#ifdef __MVS__ /* defined by z/OS compiler */
|
||||
#define _POSIX_SOURCE
|
||||
#include <cics.h> /* 12 Nov 2011 JAM iscics() function */
|
||||
#endif
|
||||
#include "charstr.h"
|
||||
|
||||
using namespace icu;
|
||||
|
||||
#ifndef UPLUG_TRACE
|
||||
#define UPLUG_TRACE 0
|
||||
#endif
|
||||
|
||||
#if UPLUG_TRACE
|
||||
#include <stdio.h>
|
||||
#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Internal structure of an ICU plugin.
|
||||
*/
|
||||
|
||||
struct UPlugData {
|
||||
UPlugEntrypoint *entrypoint; /**< plugin entrypoint */
|
||||
uint32_t structSize; /**< initialized to the size of this structure */
|
||||
uint32_t token; /**< must be U_PLUG_TOKEN */
|
||||
void *lib; /**< plugin library, or NULL */
|
||||
char libName[UPLUG_NAME_MAX]; /**< library name */
|
||||
char sym[UPLUG_NAME_MAX]; /**< plugin symbol, or NULL */
|
||||
char config[UPLUG_NAME_MAX]; /**< configuration data */
|
||||
void *context; /**< user context data */
|
||||
char name[UPLUG_NAME_MAX]; /**< name of plugin */
|
||||
UPlugLevel level; /**< level of plugin */
|
||||
UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
|
||||
UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
|
||||
UErrorCode pluginStatus; /**< status code of plugin */
|
||||
};
|
||||
|
||||
|
||||
|
||||
#define UPLUG_LIBRARY_INITIAL_COUNT 8
|
||||
#define UPLUG_PLUGIN_INITIAL_COUNT 12
|
||||
|
||||
/**
|
||||
* Remove an item
|
||||
* @param list the full list
|
||||
* @param listSize the number of entries in the list
|
||||
* @param memberSize the size of one member
|
||||
* @param itemToRemove the item number of the member
|
||||
* @return the new listsize
|
||||
*/
|
||||
static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) {
|
||||
uint8_t *bytePtr = (uint8_t *)list;
|
||||
|
||||
/* get rid of some bad cases first */
|
||||
if(listSize<1) {
|
||||
return listSize;
|
||||
}
|
||||
|
||||
/* is there anything to move? */
|
||||
if(listSize > itemToRemove+1) {
|
||||
memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize);
|
||||
}
|
||||
|
||||
return listSize-1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#if U_ENABLE_DYLOAD
|
||||
/**
|
||||
* Library management. Internal.
|
||||
* @internal
|
||||
*/
|
||||
struct UPlugLibrary;
|
||||
|
||||
/**
|
||||
* Library management. Internal.
|
||||
* @internal
|
||||
*/
|
||||
typedef struct UPlugLibrary {
|
||||
void *lib; /**< library ptr */
|
||||
char name[UPLUG_NAME_MAX]; /**< library name */
|
||||
uint32_t ref; /**< reference count */
|
||||
} UPlugLibrary;
|
||||
|
||||
static UPlugLibrary staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT];
|
||||
static UPlugLibrary * libraryList = staticLibraryList;
|
||||
static int32_t libraryCount = 0;
|
||||
static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT;
|
||||
|
||||
/**
|
||||
* Search for a library. Doesn't lock
|
||||
* @param libName libname to search for
|
||||
* @return the library's struct
|
||||
*/
|
||||
static int32_t searchForLibraryName(const char *libName) {
|
||||
int32_t i;
|
||||
|
||||
for(i=0;i<libraryCount;i++) {
|
||||
if(!uprv_strcmp(libName, libraryList[i].name)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int32_t searchForLibrary(void *lib) {
|
||||
int32_t i;
|
||||
|
||||
for(i=0;i<libraryCount;i++) {
|
||||
if(lib==libraryList[i].lib) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
U_INTERNAL char * U_EXPORT2
|
||||
uplug_findLibrary(void *lib, UErrorCode *status) {
|
||||
int32_t libEnt;
|
||||
char *ret = NULL;
|
||||
if(U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
libEnt = searchForLibrary(lib);
|
||||
if(libEnt!=-1) {
|
||||
ret = libraryList[libEnt].name;
|
||||
} else {
|
||||
*status = U_MISSING_RESOURCE_ERROR;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
U_INTERNAL void * U_EXPORT2
|
||||
uplug_openLibrary(const char *libName, UErrorCode *status) {
|
||||
int32_t libEntry = -1;
|
||||
void *lib = NULL;
|
||||
|
||||
if(U_FAILURE(*status)) return NULL;
|
||||
|
||||
libEntry = searchForLibraryName(libName);
|
||||
if(libEntry == -1) {
|
||||
libEntry = libraryCount++;
|
||||
if(libraryCount >= libraryMax) {
|
||||
/* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax));
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
/* Some operating systems don't want
|
||||
DL operations from multiple threads. */
|
||||
libraryList[libEntry].lib = uprv_dl_open(libName, status);
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
|
||||
#endif
|
||||
|
||||
if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) {
|
||||
/* cleanup. */
|
||||
libraryList[libEntry].lib = NULL; /* failure with open */
|
||||
libraryList[libEntry].name[0] = 0;
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
|
||||
#endif
|
||||
/* no need to free - just won't increase the count. */
|
||||
libraryCount--;
|
||||
} else { /* is it still there? */
|
||||
/* link it in */
|
||||
uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX);
|
||||
libraryList[libEntry].ref=1;
|
||||
lib = libraryList[libEntry].lib;
|
||||
}
|
||||
|
||||
} else {
|
||||
lib = libraryList[libEntry].lib;
|
||||
libraryList[libEntry].ref++;
|
||||
}
|
||||
return lib;
|
||||
}
|
||||
|
||||
U_INTERNAL void U_EXPORT2
|
||||
uplug_closeLibrary(void *lib, UErrorCode *status) {
|
||||
int32_t i;
|
||||
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList));
|
||||
#endif
|
||||
if(U_FAILURE(*status)) return;
|
||||
|
||||
for(i=0;i<libraryCount;i++) {
|
||||
if(lib==libraryList[i].lib) {
|
||||
if(--(libraryList[i].ref) == 0) {
|
||||
uprv_dl_close(libraryList[i].lib, status);
|
||||
libraryCount = uplug_removeEntryAt(libraryList, libraryCount, sizeof(*libraryList), i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
*status = U_INTERNAL_PROGRAM_ERROR; /* could not find the entry! */
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static UPlugData pluginList[UPLUG_PLUGIN_INITIAL_COUNT];
|
||||
static int32_t pluginCount = 0;
|
||||
|
||||
|
||||
|
||||
|
||||
static int32_t uplug_pluginNumber(UPlugData* d) {
|
||||
UPlugData *pastPlug = &pluginList[pluginCount];
|
||||
if(d<=pluginList) {
|
||||
return 0;
|
||||
} else if(d>=pastPlug) {
|
||||
return pluginCount;
|
||||
} else {
|
||||
return (d-pluginList)/sizeof(pluginList[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
U_CAPI UPlugData * U_EXPORT2
|
||||
uplug_nextPlug(UPlugData *prior) {
|
||||
if(prior==NULL) {
|
||||
return pluginList;
|
||||
} else {
|
||||
UPlugData *nextPlug = &prior[1];
|
||||
UPlugData *pastPlug = &pluginList[pluginCount];
|
||||
|
||||
if(nextPlug>=pastPlug) {
|
||||
return NULL;
|
||||
} else {
|
||||
return nextPlug;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Call the plugin with some params
|
||||
*/
|
||||
static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) {
|
||||
UPlugTokenReturn token;
|
||||
if(plug==NULL||U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
token = (*(plug->entrypoint))(plug, reason, status);
|
||||
if(token!=UPLUG_TOKEN) {
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
|
||||
if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
if(U_SUCCESS(plug->pluginStatus)) {
|
||||
/* Don't unload a plug which has a failing load status - means it didn't actually load. */
|
||||
uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status);
|
||||
}
|
||||
}
|
||||
|
||||
static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
|
||||
if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
plug->level = UPLUG_LEVEL_INVALID;
|
||||
uplug_callPlug(plug, UPLUG_REASON_QUERY, status);
|
||||
if(U_SUCCESS(*status)) {
|
||||
if(plug->level == UPLUG_LEVEL_INVALID) {
|
||||
plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
|
||||
plug->awaitingLoad = FALSE;
|
||||
}
|
||||
} else {
|
||||
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
|
||||
plug->awaitingLoad = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
|
||||
plug->awaitingLoad = FALSE;
|
||||
if(!U_SUCCESS(*status)) {
|
||||
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
|
||||
{
|
||||
UPlugData *plug = NULL;
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
plug = &pluginList[pluginCount++];
|
||||
|
||||
plug->token = UPLUG_TOKEN;
|
||||
plug->structSize = sizeof(UPlugData);
|
||||
plug->name[0]=0;
|
||||
plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
|
||||
plug->awaitingLoad = TRUE;
|
||||
plug->dontUnload = FALSE;
|
||||
plug->pluginStatus = U_ZERO_ERROR;
|
||||
plug->libName[0] = 0;
|
||||
plug->config[0]=0;
|
||||
plug->sym[0]=0;
|
||||
plug->lib=NULL;
|
||||
plug->entrypoint=NULL;
|
||||
|
||||
|
||||
return plug;
|
||||
}
|
||||
|
||||
static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName,
|
||||
UErrorCode *status) {
|
||||
UPlugData *plug = uplug_allocateEmptyPlug(status);
|
||||
if(U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(config!=NULL) {
|
||||
uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
|
||||
} else {
|
||||
plug->config[0] = 0;
|
||||
}
|
||||
|
||||
if(symName!=NULL) {
|
||||
uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX);
|
||||
} else {
|
||||
plug->sym[0] = 0;
|
||||
}
|
||||
|
||||
plug->entrypoint = entrypoint;
|
||||
plug->lib = lib;
|
||||
uplug_queryPlug(plug, status);
|
||||
|
||||
return plug;
|
||||
}
|
||||
|
||||
static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
if(!plug->dontUnload) {
|
||||
#if U_ENABLE_DYLOAD
|
||||
uplug_closeLibrary(plug->lib, &subStatus);
|
||||
#endif
|
||||
}
|
||||
plug->lib = NULL;
|
||||
if(U_SUCCESS(*status) && U_FAILURE(subStatus)) {
|
||||
*status = subStatus;
|
||||
}
|
||||
/* shift plugins up and decrement count. */
|
||||
if(U_SUCCESS(*status)) {
|
||||
/* all ok- remove. */
|
||||
pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
|
||||
} else {
|
||||
/* not ok- leave as a message. */
|
||||
plug->awaitingLoad=FALSE;
|
||||
plug->entrypoint=0;
|
||||
plug->dontUnload=TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {
|
||||
if(plugToRemove != NULL) {
|
||||
uplug_unloadPlug(plugToRemove, status);
|
||||
uplug_deallocatePlug(plugToRemove, status);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_removePlug(UPlugData *plug, UErrorCode *status) {
|
||||
UPlugData *cursor = NULL;
|
||||
UPlugData *plugToRemove = NULL;
|
||||
if(U_FAILURE(*status)) return;
|
||||
|
||||
for(cursor=pluginList;cursor!=NULL;) {
|
||||
if(cursor==plug) {
|
||||
plugToRemove = plug;
|
||||
cursor=NULL;
|
||||
} else {
|
||||
cursor = uplug_nextPlug(cursor);
|
||||
}
|
||||
}
|
||||
|
||||
uplug_doUnloadPlug(plugToRemove, status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload)
|
||||
{
|
||||
data->dontUnload = dontUnload;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_setPlugLevel(UPlugData *data, UPlugLevel level) {
|
||||
data->level = level;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI UPlugLevel U_EXPORT2
|
||||
uplug_getPlugLevel(UPlugData *data) {
|
||||
return data->level;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_setPlugName(UPlugData *data, const char *name) {
|
||||
uprv_strncpy(data->name, name, UPLUG_NAME_MAX);
|
||||
}
|
||||
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
uplug_getPlugName(UPlugData *data) {
|
||||
return data->name;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
uplug_getSymbolName(UPlugData *data) {
|
||||
return data->sym;
|
||||
}
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
uplug_getLibraryName(UPlugData *data, UErrorCode *status) {
|
||||
if(data->libName[0]) {
|
||||
return data->libName;
|
||||
} else {
|
||||
#if U_ENABLE_DYLOAD
|
||||
return uplug_findLibrary(data->lib, status);
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uplug_getLibrary(UPlugData *data) {
|
||||
return data->lib;
|
||||
}
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uplug_getContext(UPlugData *data) {
|
||||
return data->context;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_setContext(UPlugData *data, void *context) {
|
||||
data->context = context;
|
||||
}
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uplug_getConfiguration(UPlugData *data) {
|
||||
return data->config;
|
||||
}
|
||||
|
||||
U_INTERNAL UPlugData* U_EXPORT2
|
||||
uplug_getPlugInternal(int32_t n) {
|
||||
if(n <0 || n >= pluginCount) {
|
||||
return NULL;
|
||||
} else {
|
||||
return &(pluginList[n]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
U_CAPI UErrorCode U_EXPORT2
|
||||
uplug_getPlugLoadStatus(UPlugData *plug) {
|
||||
return plug->pluginStatus;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Initialize a plugin fron an entrypoint and library - but don't load it.
|
||||
*/
|
||||
static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
|
||||
UErrorCode *status) {
|
||||
UPlugData *plug = NULL;
|
||||
|
||||
plug = uplug_allocatePlug(entrypoint, config, lib, sym, status);
|
||||
|
||||
if(U_SUCCESS(*status)) {
|
||||
return plug;
|
||||
} else {
|
||||
uplug_deallocatePlug(plug, status);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UPlugData* U_EXPORT2
|
||||
uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) {
|
||||
UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status);
|
||||
uplug_loadPlug(plug, status);
|
||||
return plug;
|
||||
}
|
||||
|
||||
#if U_ENABLE_DYLOAD
|
||||
|
||||
static UPlugData*
|
||||
uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status)
|
||||
{
|
||||
UPlugData *plug = uplug_allocateEmptyPlug(status);
|
||||
if(U_FAILURE(*status)) return NULL;
|
||||
|
||||
plug->pluginStatus = loadStatus;
|
||||
plug->awaitingLoad = FALSE; /* Won't load. */
|
||||
plug->dontUnload = TRUE; /* cannot unload. */
|
||||
|
||||
if(sym!=NULL) {
|
||||
uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
|
||||
}
|
||||
|
||||
if(libName!=NULL) {
|
||||
uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX);
|
||||
}
|
||||
|
||||
if(nameOrError!=NULL) {
|
||||
uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX);
|
||||
}
|
||||
|
||||
if(config!=NULL) {
|
||||
uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
|
||||
}
|
||||
|
||||
return plug;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a plugin from DLL, and then initialize it from a library- but don't load it.
|
||||
*/
|
||||
static UPlugData*
|
||||
uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
|
||||
void *lib = NULL;
|
||||
UPlugData *plug = NULL;
|
||||
if(U_FAILURE(*status)) { return NULL; }
|
||||
lib = uplug_openLibrary(libName, status);
|
||||
if(lib!=NULL && U_SUCCESS(*status)) {
|
||||
UPlugEntrypoint *entrypoint = NULL;
|
||||
entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status);
|
||||
|
||||
if(entrypoint!=NULL&&U_SUCCESS(*status)) {
|
||||
plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status);
|
||||
if(plug!=NULL&&U_SUCCESS(*status)) {
|
||||
plug->lib = lib; /* plug takes ownership of library */
|
||||
lib = NULL; /* library is now owned by plugin. */
|
||||
}
|
||||
} else {
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
|
||||
}
|
||||
if(lib!=NULL) { /* still need to close the lib */
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
uplug_closeLibrary(lib, &subStatus); /* don't care here */
|
||||
}
|
||||
} else {
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
|
||||
}
|
||||
return plug;
|
||||
}
|
||||
|
||||
U_CAPI UPlugData* U_EXPORT2
|
||||
uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
|
||||
UPlugData *plug = NULL;
|
||||
if(U_FAILURE(*status)) { return NULL; }
|
||||
plug = uplug_initPlugFromLibrary(libName, sym, config, status);
|
||||
uplug_loadPlug(plug, status);
|
||||
|
||||
return plug;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static UPlugLevel gCurrentLevel = UPLUG_LEVEL_LOW;
|
||||
|
||||
U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() {
|
||||
return gCurrentLevel;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV uplug_cleanup(void)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
UPlugData *pluginToRemove;
|
||||
/* cleanup plugs */
|
||||
for(i=0;i<pluginCount;i++) {
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
pluginToRemove = &pluginList[i];
|
||||
/* unload and deallocate */
|
||||
uplug_doUnloadPlug(pluginToRemove, &subStatus);
|
||||
}
|
||||
/* close other held libs? */
|
||||
gCurrentLevel = UPLUG_LEVEL_LOW;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#if U_ENABLE_DYLOAD
|
||||
|
||||
static void uplug_loadWaitingPlugs(UErrorCode *status) {
|
||||
int32_t i;
|
||||
UPlugLevel currentLevel = uplug_getCurrentLevel();
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "uplug_loadWaitingPlugs() Level: %d\n", currentLevel));
|
||||
#endif
|
||||
/* pass #1: low level plugs */
|
||||
for(i=0;i<pluginCount;i++) {
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
UPlugData *pluginToLoad = &pluginList[i];
|
||||
if(pluginToLoad->awaitingLoad) {
|
||||
if(pluginToLoad->level == UPLUG_LEVEL_LOW) {
|
||||
if(currentLevel > UPLUG_LEVEL_LOW) {
|
||||
pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH;
|
||||
} else {
|
||||
UPlugLevel newLevel;
|
||||
uplug_loadPlug(pluginToLoad, &subStatus);
|
||||
newLevel = uplug_getCurrentLevel();
|
||||
if(newLevel > currentLevel) {
|
||||
pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING;
|
||||
currentLevel = newLevel;
|
||||
}
|
||||
}
|
||||
pluginToLoad->awaitingLoad = FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
for(i=0;i<pluginCount;i++) {
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
UPlugData *pluginToLoad = &pluginList[i];
|
||||
|
||||
if(pluginToLoad->awaitingLoad) {
|
||||
if(pluginToLoad->level == UPLUG_LEVEL_INVALID) {
|
||||
pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
|
||||
} else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) {
|
||||
pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
|
||||
} else {
|
||||
uplug_loadPlug(pluginToLoad, &subStatus);
|
||||
}
|
||||
pluginToLoad->awaitingLoad = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel()));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Name of the plugin config file */
|
||||
static char plugin_file[2048] = "";
|
||||
#endif
|
||||
|
||||
U_INTERNAL const char* U_EXPORT2
|
||||
uplug_getPluginFile() {
|
||||
#if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO
|
||||
return plugin_file;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// uplug_init() is called first thing from u_init().
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_init(UErrorCode *status) {
|
||||
#if !U_ENABLE_DYLOAD
|
||||
(void)status; /* unused */
|
||||
#elif !UCONFIG_NO_FILE_IO
|
||||
CharString plugin_dir;
|
||||
const char *env = getenv("ICU_PLUGINS");
|
||||
|
||||
if(U_FAILURE(*status)) return;
|
||||
if(env != NULL) {
|
||||
plugin_dir.append(env, -1, *status);
|
||||
}
|
||||
if(U_FAILURE(*status)) return;
|
||||
|
||||
#if defined(DEFAULT_ICU_PLUGINS)
|
||||
if(plugin_dir.isEmpty()) {
|
||||
plugin_dir.append(DEFAULT_ICU_PLUGINS, -1, *status);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir.data()));
|
||||
#endif
|
||||
|
||||
if(!plugin_dir.isEmpty()) {
|
||||
FILE *f;
|
||||
|
||||
CharString pluginFile;
|
||||
#ifdef OS390BATCH
|
||||
/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS */
|
||||
/* Keeping in mind that unauthorized file access is logged, monitored, and enforced */
|
||||
/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX */
|
||||
/* System Services. Alternative techniques might be allocating a member in */
|
||||
/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?). The */
|
||||
/* DDNAME can be connected to a file in the HFS if need be. */
|
||||
|
||||
pluginFile.append("//DD:ICUPLUG", -1, *status); /* JAM 20 Oct 2011 */
|
||||
#else
|
||||
pluginFile.append(plugin_dir, *status);
|
||||
pluginFile.append(U_FILE_SEP_STRING, -1, *status);
|
||||
pluginFile.append("icuplugins", -1, *status);
|
||||
pluginFile.append(U_ICU_VERSION_SHORT, -1, *status);
|
||||
pluginFile.append(".txt", -1, *status);
|
||||
#endif
|
||||
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "status=%s\n", u_errorName(*status)));
|
||||
#endif
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
if((size_t)pluginFile.length() > (sizeof(plugin_file)-1)) {
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "status=%s\n", u_errorName(*status)));
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* plugin_file is not used for processing - it is only used
|
||||
so that uplug_getPluginFile() works (i.e. icuinfo)
|
||||
*/
|
||||
uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file));
|
||||
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
|
||||
#endif
|
||||
|
||||
#ifdef __MVS__
|
||||
if (iscics()) /* 12 Nov 2011 JAM */
|
||||
{
|
||||
f = NULL;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
f = fopen(pluginFile.data(), "r");
|
||||
}
|
||||
|
||||
if(f != NULL) {
|
||||
char linebuf[1024];
|
||||
char *p, *libName=NULL, *symName=NULL, *config=NULL;
|
||||
int32_t line = 0;
|
||||
|
||||
|
||||
while(fgets(linebuf,1023,f)) {
|
||||
line++;
|
||||
|
||||
if(!*linebuf || *linebuf=='#') {
|
||||
continue;
|
||||
} else {
|
||||
p = linebuf;
|
||||
while(*p&&isspace((int)*p))
|
||||
p++;
|
||||
if(!*p || *p=='#') continue;
|
||||
libName = p;
|
||||
while(*p&&!isspace((int)*p)) {
|
||||
p++;
|
||||
}
|
||||
if(!*p || *p=='#') continue; /* no tab after libname */
|
||||
*p=0; /* end of libname */
|
||||
p++;
|
||||
while(*p&&isspace((int)*p)) {
|
||||
p++;
|
||||
}
|
||||
if(!*p||*p=='#') continue; /* no symname after libname +tab */
|
||||
symName = p;
|
||||
while(*p&&!isspace((int)*p)) {
|
||||
p++;
|
||||
}
|
||||
|
||||
if(*p) { /* has config */
|
||||
*p=0;
|
||||
++p;
|
||||
while(*p&&isspace((int)*p)) {
|
||||
p++;
|
||||
}
|
||||
if(*p) {
|
||||
config = p;
|
||||
}
|
||||
}
|
||||
|
||||
/* chop whitespace at the end of the config */
|
||||
if(config!=NULL&&*config!=0) {
|
||||
p = config+strlen(config);
|
||||
while(p>config&&isspace((int)*(--p))) {
|
||||
*p=0;
|
||||
}
|
||||
}
|
||||
|
||||
/* OK, we're good. */
|
||||
{
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus);
|
||||
if(U_FAILURE(subStatus) && U_SUCCESS(*status)) {
|
||||
*status = subStatus;
|
||||
}
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config));
|
||||
DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus)));
|
||||
#else
|
||||
(void)plug; /* unused */
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
} else {
|
||||
#if UPLUG_TRACE
|
||||
DBG((stderr, "Can't open plugin file %s\n", plugin_file));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
uplug_loadWaitingPlugs(status);
|
||||
#endif /* U_ENABLE_DYLOAD */
|
||||
gCurrentLevel = UPLUG_LEVEL_HIGH;
|
||||
ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
91
UnicodeConverter/icubuilds-mac/icu/icu/common/icuplugimp.h
Normal file
91
UnicodeConverter/icubuilds-mac/icu/icu/common/icuplugimp.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : icuplugimp.h
|
||||
*
|
||||
* Internal functions for the ICU plugin system
|
||||
*
|
||||
* Date Name Description
|
||||
* 10/29/2009 sl New.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ICUPLUGIMP_H
|
||||
#define ICUPLUGIMP_H
|
||||
|
||||
#include "unicode/icuplug.h"
|
||||
|
||||
#if UCONFIG_ENABLE_PLUGINS
|
||||
|
||||
/*========================*/
|
||||
/** @{ Library Manipulation
|
||||
*/
|
||||
|
||||
/**
|
||||
* Open a library, adding a reference count if needed.
|
||||
* @param libName library name to load
|
||||
* @param status error code
|
||||
* @return the library pointer, or NULL
|
||||
* @internal internal use only
|
||||
*/
|
||||
U_INTERNAL void * U_EXPORT2
|
||||
uplug_openLibrary(const char *libName, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Close a library, if its reference count is 0
|
||||
* @param lib the library to close
|
||||
* @param status error code
|
||||
* @internal internal use only
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
uplug_closeLibrary(void *lib, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Get a library's name, or NULL if not found.
|
||||
* @param lib the library's name
|
||||
* @param status error code
|
||||
* @return the library name, or NULL if not found.
|
||||
* @internal internal use only
|
||||
*/
|
||||
U_INTERNAL char * U_EXPORT2
|
||||
uplug_findLibrary(void *lib, UErrorCode *status);
|
||||
|
||||
/** @} */
|
||||
|
||||
/*========================*/
|
||||
/** {@ ICU Plugin internal interfaces
|
||||
*/
|
||||
|
||||
/**
|
||||
* Initialize the plugins
|
||||
* @param status error result
|
||||
* @internal - Internal use only.
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
uplug_init(UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Get raw plug N
|
||||
* @internal - Internal use only
|
||||
*/
|
||||
U_INTERNAL UPlugData* U_EXPORT2
|
||||
uplug_getPlugInternal(int32_t n);
|
||||
|
||||
/**
|
||||
* Get the name of the plugin file.
|
||||
* @internal - Internal use only.
|
||||
*/
|
||||
U_INTERNAL const char* U_EXPORT2
|
||||
uplug_getPluginFile(void);
|
||||
|
||||
/** @} */
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
364
UnicodeConverter/icubuilds-mac/icu/icu/common/listformatter.cpp
Normal file
364
UnicodeConverter/icubuilds-mac/icu/icu/common/listformatter.cpp
Normal file
@ -0,0 +1,364 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2013-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: listformatter.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2012aug27
|
||||
* created by: Umesh P. Nair
|
||||
*/
|
||||
|
||||
#include "unicode/listformatter.h"
|
||||
#include "simplepatternformatter.h"
|
||||
#include "mutex.h"
|
||||
#include "hash.h"
|
||||
#include "cstring.h"
|
||||
#include "ulocimp.h"
|
||||
#include "charstr.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uresimp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct ListFormatInternal : public UMemory {
|
||||
SimplePatternFormatter twoPattern;
|
||||
SimplePatternFormatter startPattern;
|
||||
SimplePatternFormatter middlePattern;
|
||||
SimplePatternFormatter endPattern;
|
||||
|
||||
ListFormatInternal(
|
||||
const UnicodeString& two,
|
||||
const UnicodeString& start,
|
||||
const UnicodeString& middle,
|
||||
const UnicodeString& end) :
|
||||
twoPattern(two),
|
||||
startPattern(start),
|
||||
middlePattern(middle),
|
||||
endPattern(end) {}
|
||||
|
||||
ListFormatInternal(const ListFormatData &data) :
|
||||
twoPattern(data.twoPattern),
|
||||
startPattern(data.startPattern),
|
||||
middlePattern(data.middlePattern),
|
||||
endPattern(data.endPattern) { }
|
||||
|
||||
ListFormatInternal(const ListFormatInternal &other) :
|
||||
twoPattern(other.twoPattern),
|
||||
startPattern(other.startPattern),
|
||||
middlePattern(other.middlePattern),
|
||||
endPattern(other.endPattern) { }
|
||||
};
|
||||
|
||||
|
||||
|
||||
static Hashtable* listPatternHash = NULL;
|
||||
static UMutex listFormatterMutex = U_MUTEX_INITIALIZER;
|
||||
static const char *STANDARD_STYLE = "standard";
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV uprv_listformatter_cleanup() {
|
||||
delete listPatternHash;
|
||||
listPatternHash = NULL;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
uprv_deleteListFormatInternal(void *obj) {
|
||||
delete static_cast<ListFormatInternal *>(obj);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
static ListFormatInternal* loadListFormatInternal(
|
||||
const Locale& locale,
|
||||
const char* style,
|
||||
UErrorCode& errorCode);
|
||||
|
||||
static void getStringByKey(
|
||||
const UResourceBundle* rb,
|
||||
const char* key,
|
||||
UnicodeString& result,
|
||||
UErrorCode& errorCode);
|
||||
|
||||
ListFormatter::ListFormatter(const ListFormatter& other) :
|
||||
owned(other.owned), data(other.data) {
|
||||
if (other.owned != NULL) {
|
||||
owned = new ListFormatInternal(*other.owned);
|
||||
data = owned;
|
||||
}
|
||||
}
|
||||
|
||||
ListFormatter& ListFormatter::operator=(const ListFormatter& other) {
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
delete owned;
|
||||
if (other.owned) {
|
||||
owned = new ListFormatInternal(*other.owned);
|
||||
data = owned;
|
||||
} else {
|
||||
owned = NULL;
|
||||
data = other.data;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void ListFormatter::initializeHash(UErrorCode& errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
listPatternHash = new Hashtable();
|
||||
if (listPatternHash == NULL) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
listPatternHash->setValueDeleter(uprv_deleteListFormatInternal);
|
||||
ucln_common_registerCleanup(UCLN_COMMON_LIST_FORMATTER, uprv_listformatter_cleanup);
|
||||
|
||||
}
|
||||
|
||||
const ListFormatInternal* ListFormatter::getListFormatInternal(
|
||||
const Locale& locale, const char *style, UErrorCode& errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
CharString keyBuffer(locale.getName(), errorCode);
|
||||
keyBuffer.append(':', errorCode).append(style, errorCode);
|
||||
UnicodeString key(keyBuffer.data(), -1, US_INV);
|
||||
ListFormatInternal* result = NULL;
|
||||
{
|
||||
Mutex m(&listFormatterMutex);
|
||||
if (listPatternHash == NULL) {
|
||||
initializeHash(errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
result = static_cast<ListFormatInternal*>(listPatternHash->get(key));
|
||||
}
|
||||
if (result != NULL) {
|
||||
return result;
|
||||
}
|
||||
result = loadListFormatInternal(locale, style, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
{
|
||||
Mutex m(&listFormatterMutex);
|
||||
ListFormatInternal* temp = static_cast<ListFormatInternal*>(listPatternHash->get(key));
|
||||
if (temp != NULL) {
|
||||
delete result;
|
||||
result = temp;
|
||||
} else {
|
||||
listPatternHash->put(key, result, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static ListFormatInternal* loadListFormatInternal(
|
||||
const Locale& locale, const char * style, UErrorCode& errorCode) {
|
||||
UResourceBundle* rb = ures_open(NULL, locale.getName(), &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
ures_close(rb);
|
||||
return NULL;
|
||||
}
|
||||
rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode);
|
||||
rb = ures_getByKeyWithFallback(rb, style, rb, &errorCode);
|
||||
|
||||
if (U_FAILURE(errorCode)) {
|
||||
ures_close(rb);
|
||||
return NULL;
|
||||
}
|
||||
UnicodeString two, start, middle, end;
|
||||
getStringByKey(rb, "2", two, errorCode);
|
||||
getStringByKey(rb, "start", start, errorCode);
|
||||
getStringByKey(rb, "middle", middle, errorCode);
|
||||
getStringByKey(rb, "end", end, errorCode);
|
||||
ures_close(rb);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
ListFormatInternal* result = new ListFormatInternal(two, start, middle, end);
|
||||
if (result == NULL) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void getStringByKey(const UResourceBundle* rb, const char* key, UnicodeString& result, UErrorCode& errorCode) {
|
||||
int32_t len;
|
||||
const UChar* ustr = ures_getStringByKeyWithFallback(rb, key, &len, &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
result.setTo(ustr, len);
|
||||
}
|
||||
|
||||
ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
|
||||
Locale locale; // The default locale.
|
||||
return createInstance(locale, errorCode);
|
||||
}
|
||||
|
||||
ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) {
|
||||
return createInstance(locale, STANDARD_STYLE, errorCode);
|
||||
}
|
||||
|
||||
ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) {
|
||||
Locale tempLocale = locale;
|
||||
const ListFormatInternal* listFormatInternal = getListFormatInternal(tempLocale, style, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
ListFormatter* p = new ListFormatter(listFormatInternal);
|
||||
if (p == NULL) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
ListFormatter::ListFormatter(const ListFormatData& listFormatData) {
|
||||
owned = new ListFormatInternal(listFormatData);
|
||||
data = owned;
|
||||
}
|
||||
|
||||
ListFormatter::ListFormatter(const ListFormatInternal* listFormatterInternal) : owned(NULL), data(listFormatterInternal) {
|
||||
}
|
||||
|
||||
ListFormatter::~ListFormatter() {
|
||||
delete owned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Joins first and second using the pattern pat.
|
||||
* On entry offset is an offset into first or -1 if offset unspecified.
|
||||
* On exit offset is offset of second in result if recordOffset was set
|
||||
* Otherwise if it was >=0 it is set to point into result where it used
|
||||
* to point into first. On exit, result is the join of first and second
|
||||
* according to pat. Any previous value of result gets replaced.
|
||||
*/
|
||||
static void joinStringsAndReplace(
|
||||
const SimplePatternFormatter& pat,
|
||||
const UnicodeString& first,
|
||||
const UnicodeString& second,
|
||||
UnicodeString &result,
|
||||
UBool recordOffset,
|
||||
int32_t &offset,
|
||||
UErrorCode& errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
const UnicodeString *params[2] = {&first, &second};
|
||||
int32_t offsets[2];
|
||||
pat.formatAndReplace(
|
||||
params,
|
||||
UPRV_LENGTHOF(params),
|
||||
result,
|
||||
offsets,
|
||||
UPRV_LENGTHOF(offsets),
|
||||
errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if (offsets[0] == -1 || offsets[1] == -1) {
|
||||
errorCode = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
if (recordOffset) {
|
||||
offset = offsets[1];
|
||||
} else if (offset >= 0) {
|
||||
offset += offsets[0];
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString& ListFormatter::format(
|
||||
const UnicodeString items[],
|
||||
int32_t nItems,
|
||||
UnicodeString& appendTo,
|
||||
UErrorCode& errorCode) const {
|
||||
int32_t offset;
|
||||
return format(items, nItems, appendTo, -1, offset, errorCode);
|
||||
}
|
||||
|
||||
UnicodeString& ListFormatter::format(
|
||||
const UnicodeString items[],
|
||||
int32_t nItems,
|
||||
UnicodeString& appendTo,
|
||||
int32_t index,
|
||||
int32_t &offset,
|
||||
UErrorCode& errorCode) const {
|
||||
offset = -1;
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return appendTo;
|
||||
}
|
||||
if (data == NULL) {
|
||||
errorCode = U_INVALID_STATE_ERROR;
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
if (nItems <= 0) {
|
||||
return appendTo;
|
||||
}
|
||||
if (nItems == 1) {
|
||||
if (index == 0) {
|
||||
offset = appendTo.length();
|
||||
}
|
||||
appendTo.append(items[0]);
|
||||
return appendTo;
|
||||
}
|
||||
UnicodeString result(items[0]);
|
||||
if (index == 0) {
|
||||
offset = 0;
|
||||
}
|
||||
joinStringsAndReplace(
|
||||
nItems == 2 ? data->twoPattern : data->startPattern,
|
||||
result,
|
||||
items[1],
|
||||
result,
|
||||
index == 1,
|
||||
offset,
|
||||
errorCode);
|
||||
if (nItems > 2) {
|
||||
for (int32_t i = 2; i < nItems - 1; ++i) {
|
||||
joinStringsAndReplace(
|
||||
data->middlePattern,
|
||||
result,
|
||||
items[i],
|
||||
result,
|
||||
index == i,
|
||||
offset,
|
||||
errorCode);
|
||||
}
|
||||
joinStringsAndReplace(
|
||||
data->endPattern,
|
||||
result,
|
||||
items[nItems - 1],
|
||||
result,
|
||||
index == nItems - 1,
|
||||
offset,
|
||||
errorCode);
|
||||
}
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
if (offset >= 0) {
|
||||
offset += appendTo.length();
|
||||
}
|
||||
appendTo += result;
|
||||
}
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
@ -0,0 +1,356 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* loadednormalizer2impl.cpp
|
||||
*
|
||||
* created on: 2014sep03
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "cstring.h"
|
||||
#include "mutex.h"
|
||||
#include "norm2allmodes.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "uassert.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uhash.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class LoadedNormalizer2Impl : public Normalizer2Impl {
|
||||
public:
|
||||
LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
|
||||
virtual ~LoadedNormalizer2Impl();
|
||||
|
||||
void load(const char *packageName, const char *name, UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
|
||||
|
||||
UDataMemory *memory;
|
||||
UTrie2 *ownedTrie;
|
||||
};
|
||||
|
||||
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
|
||||
udata_close(memory);
|
||||
utrie2_close(ownedTrie);
|
||||
}
|
||||
|
||||
UBool U_CALLCONV
|
||||
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
|
||||
const char * /* type */, const char * /*name*/,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
|
||||
pInfo->dataFormat[1]==0x72 &&
|
||||
pInfo->dataFormat[2]==0x6d &&
|
||||
pInfo->dataFormat[3]==0x32 &&
|
||||
pInfo->formatVersion[0]==2
|
||||
) {
|
||||
// Normalizer2Impl *me=(Normalizer2Impl *)context;
|
||||
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
|
||||
const int32_t *inIndexes=(const int32_t *)inBytes;
|
||||
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
|
||||
if(indexesLength<=IX_MIN_MAYBE_YES) {
|
||||
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
|
||||
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
|
||||
ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
|
||||
inBytes+offset, nextOffset-offset, NULL,
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
offset=nextOffset;
|
||||
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
|
||||
const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
|
||||
|
||||
// smallFCD: new in formatVersion 2
|
||||
offset=nextOffset;
|
||||
const uint8_t *inSmallFCD=inBytes+offset;
|
||||
|
||||
init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
|
||||
}
|
||||
|
||||
// instance cache ---------------------------------------------------------- ***
|
||||
|
||||
Norm2AllModes *
|
||||
Norm2AllModes::createInstance(const char *packageName,
|
||||
const char *name,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
|
||||
if(impl==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
impl->load(packageName, name, errorCode);
|
||||
return createInstance(impl, errorCode);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
|
||||
U_CDECL_END
|
||||
|
||||
static Norm2AllModes *nfkcSingleton;
|
||||
static Norm2AllModes *nfkc_cfSingleton;
|
||||
static UHashtable *cache=NULL;
|
||||
|
||||
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
// UInitOnce singleton initialization function
|
||||
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
|
||||
if (uprv_strcmp(what, "nfkc") == 0) {
|
||||
nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
|
||||
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
|
||||
nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
|
||||
} else {
|
||||
U_ASSERT(FALSE); // Unknown singleton
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
|
||||
delete (Norm2AllModes *)allModes;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
|
||||
delete nfkcSingleton;
|
||||
nfkcSingleton = NULL;
|
||||
delete nfkc_cfSingleton;
|
||||
nfkc_cfSingleton = NULL;
|
||||
uhash_close(cache);
|
||||
cache=NULL;
|
||||
nfkcInitOnce.reset();
|
||||
nfkc_cfInitOnce.reset();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
|
||||
return nfkcSingleton;
|
||||
}
|
||||
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
|
||||
return nfkc_cfSingleton;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->decomp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getInstance(const char *packageName,
|
||||
const char *name,
|
||||
UNormalization2Mode mode,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(name==NULL || *name==0) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
const Norm2AllModes *allModes=NULL;
|
||||
if(packageName==NULL) {
|
||||
if(0==uprv_strcmp(name, "nfc")) {
|
||||
allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
} else if(0==uprv_strcmp(name, "nfkc")) {
|
||||
allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
|
||||
allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||||
}
|
||||
}
|
||||
if(allModes==NULL && U_SUCCESS(errorCode)) {
|
||||
{
|
||||
Mutex lock;
|
||||
if(cache!=NULL) {
|
||||
allModes=(Norm2AllModes *)uhash_get(cache, name);
|
||||
}
|
||||
}
|
||||
if(allModes==NULL) {
|
||||
LocalPointer<Norm2AllModes> localAllModes(
|
||||
Norm2AllModes::createInstance(packageName, name, errorCode));
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
Mutex lock;
|
||||
if(cache==NULL) {
|
||||
cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
uhash_setKeyDeleter(cache, uprv_free);
|
||||
uhash_setValueDeleter(cache, deleteNorm2AllModes);
|
||||
}
|
||||
void *temp=uhash_get(cache, name);
|
||||
if(temp==NULL) {
|
||||
int32_t keyLength=uprv_strlen(name)+1;
|
||||
char *nameCopy=(char *)uprv_malloc(keyLength);
|
||||
if(nameCopy==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(nameCopy, name, keyLength);
|
||||
allModes=localAllModes.getAlias();
|
||||
uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
|
||||
} else {
|
||||
// race condition
|
||||
allModes=(Norm2AllModes *)temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(allModes!=NULL && U_SUCCESS(errorCode)) {
|
||||
switch(mode) {
|
||||
case UNORM2_COMPOSE:
|
||||
return &allModes->comp;
|
||||
case UNORM2_DECOMPOSE:
|
||||
return &allModes->decomp;
|
||||
case UNORM2_FCD:
|
||||
return &allModes->fcd;
|
||||
case UNORM2_COMPOSE_CONTIGUOUS:
|
||||
return &allModes->fcc;
|
||||
default:
|
||||
break; // do nothing
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
switch(mode) {
|
||||
case UNORM_NFD:
|
||||
return Normalizer2::getNFDInstance(errorCode);
|
||||
case UNORM_NFKD:
|
||||
return Normalizer2::getNFKDInstance(errorCode);
|
||||
case UNORM_NFC:
|
||||
return Normalizer2::getNFCInstance(errorCode);
|
||||
case UNORM_NFKC:
|
||||
return Normalizer2::getNFKCInstance(errorCode);
|
||||
case UNORM_FCD:
|
||||
return getFCDInstance(errorCode);
|
||||
default: // UNORM_NONE
|
||||
return getNoopInstance(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
return allModes!=NULL ? allModes->impl : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||||
return allModes!=NULL ? allModes->impl : NULL;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// C API ------------------------------------------------------------------- ***
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getInstance(const char *packageName,
|
||||
const char *name,
|
||||
UNormalization2Mode mode,
|
||||
UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC UNormalizationCheckResult
|
||||
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
|
||||
if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
|
||||
return UNORM_YES;
|
||||
}
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
|
||||
} else {
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // !UCONFIG_NO_NORMALIZATION
|
||||
25
UnicodeConverter/icubuilds-mac/icu/icu/common/localsvc.h
Normal file
25
UnicodeConverter/icubuilds-mac/icu/icu/common/localsvc.h
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 2006 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef LOCALSVC_H
|
||||
#define LOCALSVC_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_LOCAL_SERVICE_HOOK
|
||||
/**
|
||||
* Prototype for user-supplied service hook. This function is expected to return
|
||||
* a type of factory object specific to the requested service.
|
||||
*
|
||||
* @param what service-specific string identifying the specific user hook
|
||||
* @param status error status
|
||||
* @return a service-specific hook, or NULL on failure.
|
||||
*/
|
||||
U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
179
UnicodeConverter/icubuilds-mac/icu/icu/common/locavailable.cpp
Normal file
179
UnicodeConverter/icubuilds-mac/icu/icu/common/locavailable.cpp
Normal file
@ -0,0 +1,179 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: locavailable.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010feb25
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Code for available locales, separated out from other .cpp files
|
||||
* that then do not depend on resource bundle code and res_index bundles.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uassert.h"
|
||||
#include "umutex.h"
|
||||
#include "uresimp.h"
|
||||
|
||||
// C++ API ----------------------------------------------------------------- ***
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
static icu::Locale* availableLocaleList = NULL;
|
||||
static int32_t availableLocaleListCount;
|
||||
static icu::UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static UBool U_CALLCONV locale_available_cleanup(void)
|
||||
{
|
||||
U_NAMESPACE_USE
|
||||
|
||||
if (availableLocaleList) {
|
||||
delete []availableLocaleList;
|
||||
availableLocaleList = NULL;
|
||||
}
|
||||
availableLocaleListCount = 0;
|
||||
gInitOnce.reset();
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
void U_CALLCONV locale_available_init() {
|
||||
// This function is a friend of class Locale.
|
||||
// This function is only invoked via umtx_initOnce().
|
||||
|
||||
// for now, there is a hardcoded list, so just walk through that list and set it up.
|
||||
// Note: this function is a friend of class Locale.
|
||||
availableLocaleListCount = uloc_countAvailable();
|
||||
if(availableLocaleListCount) {
|
||||
availableLocaleList = new Locale[availableLocaleListCount];
|
||||
}
|
||||
if (availableLocaleList == NULL) {
|
||||
availableLocaleListCount= 0;
|
||||
}
|
||||
for (int32_t locCount=availableLocaleListCount-1; locCount>=0; --locCount) {
|
||||
availableLocaleList[locCount].setFromPOSIXID(uloc_getAvailable(locCount));
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_LOCALE_AVAILABLE, locale_available_cleanup);
|
||||
}
|
||||
|
||||
const Locale* U_EXPORT2
|
||||
Locale::getAvailableLocales(int32_t& count)
|
||||
{
|
||||
umtx_initOnce(gInitOnce, &locale_available_init);
|
||||
count = availableLocaleListCount;
|
||||
return availableLocaleList;
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// C API ------------------------------------------------------------------- ***
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/* ### Constants **************************************************/
|
||||
|
||||
/* These strings describe the resources we attempt to load from
|
||||
the locale ResourceBundle data file.*/
|
||||
static const char _kIndexLocaleName[] = "res_index";
|
||||
static const char _kIndexTag[] = "InstalledLocales";
|
||||
|
||||
static char** _installedLocales = NULL;
|
||||
static int32_t _installedLocalesCount = 0;
|
||||
static icu::UInitOnce _installedLocalesInitOnce;
|
||||
|
||||
/* ### Get available **************************************************/
|
||||
|
||||
static UBool U_CALLCONV uloc_cleanup(void) {
|
||||
char ** temp;
|
||||
|
||||
if (_installedLocales) {
|
||||
temp = _installedLocales;
|
||||
_installedLocales = NULL;
|
||||
|
||||
_installedLocalesCount = 0;
|
||||
_installedLocalesInitOnce.reset();
|
||||
|
||||
uprv_free(temp);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// Load Installed Locales. This function will be called exactly once
|
||||
// via the initOnce mechanism.
|
||||
|
||||
static void U_CALLCONV loadInstalledLocales() {
|
||||
UResourceBundle *indexLocale = NULL;
|
||||
UResourceBundle installed;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t i = 0;
|
||||
int32_t localeCount;
|
||||
|
||||
U_ASSERT(_installedLocales == NULL);
|
||||
U_ASSERT(_installedLocalesCount == 0);
|
||||
|
||||
_installedLocalesCount = 0;
|
||||
ures_initStackObject(&installed);
|
||||
indexLocale = ures_openDirect(NULL, _kIndexLocaleName, &status);
|
||||
ures_getByKey(indexLocale, _kIndexTag, &installed, &status);
|
||||
|
||||
if(U_SUCCESS(status)) {
|
||||
localeCount = ures_getSize(&installed);
|
||||
_installedLocales = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
|
||||
if (_installedLocales != NULL) {
|
||||
ures_resetIterator(&installed);
|
||||
while(ures_hasNext(&installed)) {
|
||||
ures_getNextString(&installed, NULL, (const char **)&_installedLocales[i++], &status);
|
||||
}
|
||||
_installedLocales[i] = NULL;
|
||||
_installedLocalesCount = localeCount;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
|
||||
}
|
||||
}
|
||||
ures_close(&installed);
|
||||
ures_close(indexLocale);
|
||||
}
|
||||
|
||||
static void _load_installedLocales()
|
||||
{
|
||||
umtx_initOnce(_installedLocalesInitOnce, &loadInstalledLocales);
|
||||
}
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getAvailable(int32_t offset)
|
||||
{
|
||||
|
||||
_load_installedLocales();
|
||||
|
||||
if (offset > _installedLocalesCount)
|
||||
return NULL;
|
||||
return _installedLocales[offset];
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_countAvailable()
|
||||
{
|
||||
_load_installedLocales();
|
||||
return _installedLocalesCount;
|
||||
}
|
||||
|
||||
53
UnicodeConverter/icubuilds-mac/icu/icu/common/locbased.cpp
Normal file
53
UnicodeConverter/icubuilds-mac/icu/icu/common/locbased.cpp
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2004-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: January 16 2004
|
||||
* Since: ICU 2.8
|
||||
**********************************************************************
|
||||
*/
|
||||
#include "locbased.h"
|
||||
#include "cstring.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
const char* id = getLocaleID(type, status);
|
||||
return Locale((id != 0) ? id : "");
|
||||
}
|
||||
|
||||
const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch(type) {
|
||||
case ULOC_VALID_LOCALE:
|
||||
return valid;
|
||||
case ULOC_ACTUAL_LOCALE:
|
||||
return actual;
|
||||
default:
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
|
||||
if (validID != 0) {
|
||||
uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
|
||||
valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
|
||||
}
|
||||
if (actualID != 0) {
|
||||
uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
|
||||
actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
|
||||
uprv_strcpy(valid, validID.getName());
|
||||
uprv_strcpy(actual, actualID.getName());
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
105
UnicodeConverter/icubuilds-mac/icu/icu/common/locbased.h
Normal file
105
UnicodeConverter/icubuilds-mac/icu/icu/common/locbased.h
Normal file
@ -0,0 +1,105 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2004-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: January 16 2004
|
||||
* Since: ICU 2.8
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef LOCBASED_H
|
||||
#define LOCBASED_H
|
||||
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* Macro to declare a locale LocaleBased wrapper object for the given
|
||||
* object, which must have two members named `validLocale' and
|
||||
* `actualLocale' of size ULOC_FULLNAME_CAPACITY
|
||||
*/
|
||||
#define U_LOCALE_BASED(varname, objname) \
|
||||
LocaleBased varname((objname).validLocale, (objname).actualLocale);
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A utility class that unifies the implementation of getLocale() by
|
||||
* various ICU services. This class is likely to be removed in the
|
||||
* ICU 3.0 time frame in favor of an integrated approach with the
|
||||
* services framework.
|
||||
* @since ICU 2.8
|
||||
*/
|
||||
class U_COMMON_API LocaleBased : public UMemory {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Construct a LocaleBased wrapper around the two pointers. These
|
||||
* will be aliased for the lifetime of this object.
|
||||
*/
|
||||
inline LocaleBased(char* validAlias, char* actualAlias);
|
||||
|
||||
/**
|
||||
* Construct a LocaleBased wrapper around the two const pointers.
|
||||
* These will be aliased for the lifetime of this object.
|
||||
*/
|
||||
inline LocaleBased(const char* validAlias, const char* actualAlias);
|
||||
|
||||
/**
|
||||
* Return locale meta-data for the service object wrapped by this
|
||||
* object. Either the valid or the actual locale may be
|
||||
* retrieved.
|
||||
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
|
||||
* @param status input-output error code
|
||||
* @return the indicated locale
|
||||
*/
|
||||
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Return the locale ID for the service object wrapped by this
|
||||
* object. Either the valid or the actual locale may be
|
||||
* retrieved.
|
||||
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
|
||||
* @param status input-output error code
|
||||
* @return the indicated locale ID
|
||||
*/
|
||||
const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Set the locale meta-data for the service object wrapped by this
|
||||
* object. If either parameter is zero, it is ignored.
|
||||
* @param valid the ID of the valid locale
|
||||
* @param actual the ID of the actual locale
|
||||
*/
|
||||
void setLocaleIDs(const char* valid, const char* actual);
|
||||
|
||||
/**
|
||||
* Set the locale meta-data for the service object wrapped by this
|
||||
* object.
|
||||
* @param valid the ID of the valid locale
|
||||
* @param actual the ID of the actual locale
|
||||
*/
|
||||
void setLocaleIDs(const Locale& valid, const Locale& actual);
|
||||
|
||||
private:
|
||||
|
||||
char* valid;
|
||||
|
||||
char* actual;
|
||||
};
|
||||
|
||||
inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
|
||||
valid(validAlias), actual(actualAlias) {
|
||||
}
|
||||
|
||||
inline LocaleBased::LocaleBased(const char* validAlias,
|
||||
const char* actualAlias) :
|
||||
// ugh: cast away const
|
||||
valid((char*)validAlias), actual((char*)actualAlias) {
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
882
UnicodeConverter/icubuilds-mac/icu/icu/common/locdispnames.cpp
Normal file
882
UnicodeConverter/icubuilds-mac/icu/icu/common/locdispnames.cpp
Normal file
@ -0,0 +1,882 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: locdispnames.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010feb25
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Code for locale display names, separated out from other .cpp files
|
||||
* that then do not depend on resource bundle code and display name data.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "putilimp.h"
|
||||
#include "ulocimp.h"
|
||||
#include "uresimp.h"
|
||||
#include "ureslocs.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
// C++ API ----------------------------------------------------------------- ***
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayLanguage(UnicodeString& dispLang) const
|
||||
{
|
||||
return this->getDisplayLanguage(getDefault(), dispLang);
|
||||
}
|
||||
|
||||
/*We cannot make any assumptions on the size of the output display strings
|
||||
* Yet, since we are calling through to a C API, we need to set limits on
|
||||
* buffer size. For all the following getDisplay functions we first attempt
|
||||
* to fill up a stack allocated buffer. If it is to small we heap allocated
|
||||
* the exact buffer we need copy it to the UnicodeString and delete it*/
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayLanguage(const Locale &displayLocale,
|
||||
UnicodeString &result) const {
|
||||
UChar *buffer;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
int32_t length;
|
||||
|
||||
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
buffer=result.getBuffer(length);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayScript(UnicodeString& dispScript) const
|
||||
{
|
||||
return this->getDisplayScript(getDefault(), dispScript);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayScript(const Locale &displayLocale,
|
||||
UnicodeString &result) const {
|
||||
UChar *buffer;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
int32_t length;
|
||||
|
||||
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
length=uloc_getDisplayScript(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
buffer=result.getBuffer(length);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uloc_getDisplayScript(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayCountry(UnicodeString& dispCntry) const
|
||||
{
|
||||
return this->getDisplayCountry(getDefault(), dispCntry);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayCountry(const Locale &displayLocale,
|
||||
UnicodeString &result) const {
|
||||
UChar *buffer;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
int32_t length;
|
||||
|
||||
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
buffer=result.getBuffer(length);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayVariant(UnicodeString& dispVar) const
|
||||
{
|
||||
return this->getDisplayVariant(getDefault(), dispVar);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayVariant(const Locale &displayLocale,
|
||||
UnicodeString &result) const {
|
||||
UChar *buffer;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
int32_t length;
|
||||
|
||||
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
buffer=result.getBuffer(length);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayName( UnicodeString& name ) const
|
||||
{
|
||||
return this->getDisplayName(getDefault(), name);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
Locale::getDisplayName(const Locale &displayLocale,
|
||||
UnicodeString &result) const {
|
||||
UChar *buffer;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
int32_t length;
|
||||
|
||||
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
length=uloc_getDisplayName(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
buffer=result.getBuffer(length);
|
||||
if(buffer==0) {
|
||||
result.truncate(0);
|
||||
return result;
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uloc_getDisplayName(fullName, displayLocale.fullName,
|
||||
buffer, result.getCapacity(),
|
||||
&errorCode);
|
||||
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#if ! UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
// -------------------------------------
|
||||
// Gets the objectLocale display name in the default locale language.
|
||||
UnicodeString& U_EXPORT2
|
||||
BreakIterator::getDisplayName(const Locale& objectLocale,
|
||||
UnicodeString& name)
|
||||
{
|
||||
return objectLocale.getDisplayName(name);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Gets the objectLocale display name in the displayLocale language.
|
||||
UnicodeString& U_EXPORT2
|
||||
BreakIterator::getDisplayName(const Locale& objectLocale,
|
||||
const Locale& displayLocale,
|
||||
UnicodeString& name)
|
||||
{
|
||||
return objectLocale.getDisplayName(displayLocale, name);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// C API ------------------------------------------------------------------- ***
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/* ### Constants **************************************************/
|
||||
|
||||
/* These strings describe the resources we attempt to load from
|
||||
the locale ResourceBundle data file.*/
|
||||
static const char _kLanguages[] = "Languages";
|
||||
static const char _kScripts[] = "Scripts";
|
||||
static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
|
||||
static const char _kCountries[] = "Countries";
|
||||
static const char _kVariants[] = "Variants";
|
||||
static const char _kKeys[] = "Keys";
|
||||
static const char _kTypes[] = "Types";
|
||||
//static const char _kRootName[] = "root";
|
||||
static const char _kCurrency[] = "currency";
|
||||
static const char _kCurrencies[] = "Currencies";
|
||||
static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
|
||||
static const char _kPattern[] = "pattern";
|
||||
static const char _kSeparator[] = "separator";
|
||||
|
||||
/* ### Display name **************************************************/
|
||||
|
||||
static int32_t
|
||||
_getStringOrCopyKey(const char *path, const char *locale,
|
||||
const char *tableKey,
|
||||
const char* subTableKey,
|
||||
const char *itemKey,
|
||||
const char *substitute,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s = NULL;
|
||||
int32_t length = 0;
|
||||
|
||||
if(itemKey==NULL) {
|
||||
/* top-level item: normal resource bundle access */
|
||||
UResourceBundle *rb;
|
||||
|
||||
rb=ures_open(path, locale, pErrorCode);
|
||||
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
|
||||
/* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
|
||||
ures_close(rb);
|
||||
}
|
||||
} else {
|
||||
/* Language code should not be a number. If it is, set the error code. */
|
||||
if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
|
||||
*pErrorCode = U_MISSING_RESOURCE_ERROR;
|
||||
} else {
|
||||
/* second-level item, use special fallback */
|
||||
s=uloc_getTableStringWithFallback(path, locale,
|
||||
tableKey,
|
||||
subTableKey,
|
||||
itemKey,
|
||||
&length,
|
||||
pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
int32_t copyLength=uprv_min(length, destCapacity);
|
||||
if(copyLength>0 && s != NULL) {
|
||||
u_memcpy(dest, s, copyLength);
|
||||
}
|
||||
} else {
|
||||
/* no string from a resource bundle: convert the substitute */
|
||||
length=(int32_t)uprv_strlen(substitute);
|
||||
u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
|
||||
*pErrorCode=U_USING_DEFAULT_WARNING;
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
|
||||
}
|
||||
|
||||
typedef int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
|
||||
|
||||
static int32_t
|
||||
_getDisplayNameForComponent(const char *locale,
|
||||
const char *displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UDisplayNameGetter *getter,
|
||||
const char *tag,
|
||||
UErrorCode *pErrorCode) {
|
||||
char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
|
||||
int32_t length;
|
||||
UErrorCode localStatus;
|
||||
const char* root = NULL;
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
localStatus = U_ZERO_ERROR;
|
||||
length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
|
||||
if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(length==0) {
|
||||
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
|
||||
}
|
||||
|
||||
root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
|
||||
|
||||
return _getStringOrCopyKey(root, displayLocale,
|
||||
tag, NULL, localeBuffer,
|
||||
localeBuffer,
|
||||
dest, destCapacity,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayLanguage(const char *locale,
|
||||
const char *displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
|
||||
uloc_getLanguage, _kLanguages, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayScript(const char* locale,
|
||||
const char* displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
|
||||
uloc_getScript, _kScriptsStandAlone, &err);
|
||||
|
||||
if ( err == U_USING_DEFAULT_WARNING ) {
|
||||
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
|
||||
uloc_getScript, _kScripts, pErrorCode);
|
||||
} else {
|
||||
*pErrorCode = err;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
uloc_getDisplayScriptInContext(const char* locale,
|
||||
const char* displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
|
||||
uloc_getScript, _kScripts, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayCountry(const char *locale,
|
||||
const char *displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
|
||||
uloc_getCountry, _kCountries, pErrorCode);
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO separate variant1_variant2_variant3...
|
||||
* by getting each tag's display string and concatenating them with ", "
|
||||
* in between - similar to uloc_getDisplayName()
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayVariant(const char *locale,
|
||||
const char *displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
|
||||
uloc_getVariant, _kVariants, pErrorCode);
|
||||
}
|
||||
|
||||
/* Instead of having a separate pass for 'special' patterns, reintegrate the two
|
||||
* so we don't get bitten by preflight bugs again. We can be reasonably efficient
|
||||
* without two separate code paths, this code isn't that performance-critical.
|
||||
*
|
||||
* This code is general enough to deal with patterns that have a prefix or swap the
|
||||
* language and remainder components, since we gave developers enough rope to do such
|
||||
* things if they futz with the pattern data. But since we don't give them a way to
|
||||
* specify a pattern for arbitrary combinations of components, there's not much use in
|
||||
* that. I don't think our data includes such patterns, the only variable I know if is
|
||||
* whether there is a space before the open paren, or not. Oh, and zh uses different
|
||||
* chars than the standard open/close paren (which ja and ko use, btw).
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayName(const char *locale,
|
||||
const char *displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
|
||||
static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
|
||||
static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
|
||||
static const int32_t subLen = 3;
|
||||
static const UChar defaultPattern[10] = {
|
||||
0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
|
||||
}; /* {0} ({1}) */
|
||||
static const int32_t defaultPatLen = 9;
|
||||
static const int32_t defaultSub0Pos = 0;
|
||||
static const int32_t defaultSub1Pos = 5;
|
||||
|
||||
int32_t length; /* of formatted result */
|
||||
|
||||
const UChar *separator;
|
||||
int32_t sepLen = 0;
|
||||
const UChar *pattern;
|
||||
int32_t patLen = 0;
|
||||
int32_t sub0Pos, sub1Pos;
|
||||
|
||||
UChar formatOpenParen = 0x0028; // (
|
||||
UChar formatReplaceOpenParen = 0x005B; // [
|
||||
UChar formatCloseParen = 0x0029; // )
|
||||
UChar formatReplaceCloseParen = 0x005D; // ]
|
||||
|
||||
UBool haveLang = TRUE; /* assume true, set false if we find we don't have
|
||||
a lang component in the locale */
|
||||
UBool haveRest = TRUE; /* assume true, set false if we find we don't have
|
||||
any other component in the locale */
|
||||
UBool retry = FALSE; /* set true if we need to retry, see below */
|
||||
|
||||
int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status);
|
||||
UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern,
|
||||
NULL, &status);
|
||||
|
||||
separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status);
|
||||
pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status);
|
||||
|
||||
ures_close(dspbundle);
|
||||
ures_close(locbundle);
|
||||
}
|
||||
|
||||
/* If we couldn't find any data, then use the defaults */
|
||||
if(sepLen == 0) {
|
||||
separator = defaultSeparator;
|
||||
}
|
||||
/* #10244: Even though separator is now a pattern, it is awkward to handle it as such
|
||||
* here since we are trying to build the display string in place in the dest buffer,
|
||||
* and to handle it as a pattern would entail having separate storage for the
|
||||
* substrings that need to be combined (the first of which may be the result of
|
||||
* previous such combinations). So for now we continue to treat the portion between
|
||||
* {0} and {1} as a string to be appended when joining substrings, ignoring anything
|
||||
* that is before {0} or after {1} (no existing separator pattern has any such thing).
|
||||
* This is similar to how pattern is handled below.
|
||||
*/
|
||||
{
|
||||
UChar *p0=u_strstr(separator, sub0);
|
||||
UChar *p1=u_strstr(separator, sub1);
|
||||
if (p0==NULL || p1==NULL || p1<p0) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
separator = (const UChar *)p0 + subLen;
|
||||
sepLen = p1 - separator;
|
||||
}
|
||||
|
||||
if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
|
||||
pattern=defaultPattern;
|
||||
patLen=defaultPatLen;
|
||||
sub0Pos=defaultSub0Pos;
|
||||
sub1Pos=defaultSub1Pos;
|
||||
// use default formatOpenParen etc. set above
|
||||
} else { /* non-default pattern */
|
||||
UChar *p0=u_strstr(pattern, sub0);
|
||||
UChar *p1=u_strstr(pattern, sub1);
|
||||
if (p0==NULL || p1==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
sub0Pos=p0-pattern;
|
||||
sub1Pos=p1-pattern;
|
||||
if (sub1Pos < sub0Pos) { /* a very odd pattern */
|
||||
int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
|
||||
langi=1;
|
||||
}
|
||||
if (u_strchr(pattern, 0xFF08) != NULL) {
|
||||
formatOpenParen = 0xFF08; // fullwidth (
|
||||
formatReplaceOpenParen = 0xFF3B; // fullwidth [
|
||||
formatCloseParen = 0xFF09; // fullwidth )
|
||||
formatReplaceCloseParen = 0xFF3D; // fullwidth ]
|
||||
}
|
||||
}
|
||||
|
||||
/* We loop here because there is one case in which after the first pass we could need to
|
||||
* reextract the data. If there's initial padding before the first element, we put in
|
||||
* the padding and then write that element. If it turns out there's no second element,
|
||||
* we didn't need the padding. If we do need the data (no preflight), and the first element
|
||||
* would have fit but for the padding, we need to reextract. In this case (only) we
|
||||
* adjust the parameters so padding is not added, and repeat.
|
||||
*/
|
||||
do {
|
||||
UChar* p=dest;
|
||||
int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
|
||||
int32_t langLen=0; /* length of language substitution */
|
||||
int32_t langPos=0; /* position in output of language substitution */
|
||||
int32_t restLen=0; /* length of 'everything else' substitution */
|
||||
int32_t restPos=0; /* position in output of 'everything else' substitution */
|
||||
UEnumeration* kenum = NULL; /* keyword enumeration */
|
||||
|
||||
/* prefix of pattern, extremely likely to be empty */
|
||||
if(sub0Pos) {
|
||||
if(destCapacity >= sub0Pos) {
|
||||
while (patPos < sub0Pos) {
|
||||
*p++ = pattern[patPos++];
|
||||
}
|
||||
} else {
|
||||
patPos=sub0Pos;
|
||||
}
|
||||
length=sub0Pos;
|
||||
} else {
|
||||
length=0;
|
||||
}
|
||||
|
||||
for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
|
||||
UBool subdone = FALSE; /* set true when ready to move to next substitution */
|
||||
|
||||
/* prep p and cap for calls to get display components, pin cap to 0 since
|
||||
they complain if cap is negative */
|
||||
int32_t cap=destCapacity-length;
|
||||
if (cap <= 0) {
|
||||
cap=0;
|
||||
} else {
|
||||
p=dest+length;
|
||||
}
|
||||
|
||||
if (subi == langi) { /* {0}*/
|
||||
if(haveLang) {
|
||||
langPos=length;
|
||||
langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
|
||||
length+=langLen;
|
||||
haveLang=langLen>0;
|
||||
}
|
||||
subdone=TRUE;
|
||||
} else { /* {1} */
|
||||
if(!haveRest) {
|
||||
subdone=TRUE;
|
||||
} else {
|
||||
int32_t len; /* length of component (plus other stuff) we just fetched */
|
||||
switch(resti++) {
|
||||
case 0:
|
||||
restPos=length;
|
||||
len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
|
||||
break;
|
||||
case 1:
|
||||
len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
|
||||
break;
|
||||
case 2:
|
||||
len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
|
||||
break;
|
||||
case 3:
|
||||
kenum = uloc_openKeywords(locale, pErrorCode);
|
||||
/* fall through */
|
||||
default: {
|
||||
const char* kw=uenum_next(kenum, &len, pErrorCode);
|
||||
if (kw == NULL) {
|
||||
uenum_close(kenum);
|
||||
len=0; /* mark that we didn't add a component */
|
||||
subdone=TRUE;
|
||||
} else {
|
||||
/* incorporating this behavior into the loop made it even more complex,
|
||||
so just special case it here */
|
||||
len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
|
||||
if(len) {
|
||||
if(len < cap) {
|
||||
p[len]=0x3d; /* '=', assume we'll need it */
|
||||
}
|
||||
len+=1;
|
||||
|
||||
/* adjust for call to get keyword */
|
||||
cap-=len;
|
||||
if(cap <= 0) {
|
||||
cap=0;
|
||||
} else {
|
||||
p+=len;
|
||||
}
|
||||
}
|
||||
/* reset for call below */
|
||||
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
}
|
||||
int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
|
||||
p, cap, pErrorCode);
|
||||
if(len) {
|
||||
if(vlen==0) {
|
||||
--len; /* remove unneeded '=' */
|
||||
}
|
||||
/* restore cap and p to what they were at start */
|
||||
cap=destCapacity-length;
|
||||
if(cap <= 0) {
|
||||
cap=0;
|
||||
} else {
|
||||
p=dest+length;
|
||||
}
|
||||
}
|
||||
len+=vlen; /* total we added for key + '=' + value */
|
||||
}
|
||||
} break;
|
||||
} /* end switch */
|
||||
|
||||
if (len>0) {
|
||||
/* we addeed a component, so add separator and write it if there's room. */
|
||||
if(len+sepLen<=cap) {
|
||||
const UChar * plimit = p + len;
|
||||
for (; p < plimit; p++) {
|
||||
if (*p == formatOpenParen) {
|
||||
*p = formatReplaceOpenParen;
|
||||
} else if (*p == formatCloseParen) {
|
||||
*p = formatReplaceCloseParen;
|
||||
}
|
||||
}
|
||||
for(int32_t i=0;i<sepLen;++i) {
|
||||
*p++=separator[i];
|
||||
}
|
||||
}
|
||||
length+=len+sepLen;
|
||||
} else if(subdone) {
|
||||
/* remove separator if we added it */
|
||||
if (length!=restPos) {
|
||||
length-=sepLen;
|
||||
}
|
||||
restLen=length-restPos;
|
||||
haveRest=restLen>0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if(subdone) {
|
||||
if(haveLang && haveRest) {
|
||||
/* append internal portion of pattern, the first time,
|
||||
or last portion of pattern the second time */
|
||||
int32_t padLen;
|
||||
patPos+=subLen;
|
||||
padLen=(subi==0 ? sub1Pos : patLen)-patPos;
|
||||
if(length+padLen < destCapacity) {
|
||||
p=dest+length;
|
||||
for(int32_t i=0;i<padLen;++i) {
|
||||
*p++=pattern[patPos++];
|
||||
}
|
||||
} else {
|
||||
patPos+=padLen;
|
||||
}
|
||||
length+=padLen;
|
||||
} else if(subi==0) {
|
||||
/* don't have first component, reset for second component */
|
||||
sub0Pos=0;
|
||||
length=0;
|
||||
} else if(length>0) {
|
||||
/* true length is the length of just the component we got. */
|
||||
length=haveLang?langLen:restLen;
|
||||
if(dest && sub0Pos!=0) {
|
||||
if (sub0Pos+length<=destCapacity) {
|
||||
/* first component not at start of result,
|
||||
but we have full component in buffer. */
|
||||
u_memmove(dest, dest+(haveLang?langPos:restPos), length);
|
||||
} else {
|
||||
/* would have fit, but didn't because of pattern prefix. */
|
||||
sub0Pos=0; /* stops initial padding (and a second retry,
|
||||
so we won't end up here again) */
|
||||
retry=TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
++subi; /* move on to next substitution */
|
||||
}
|
||||
}
|
||||
} while(retry);
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayKeyword(const char* keyword,
|
||||
const char* displayLocale,
|
||||
UChar* dest,
|
||||
int32_t destCapacity,
|
||||
UErrorCode* status){
|
||||
|
||||
/* argument checking */
|
||||
if(status==NULL || U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
|
||||
*status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* pass itemKey=NULL to look for a top-level item */
|
||||
return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
|
||||
_kKeys, NULL,
|
||||
keyword,
|
||||
keyword,
|
||||
dest, destCapacity,
|
||||
status);
|
||||
|
||||
}
|
||||
|
||||
|
||||
#define UCURRENCY_DISPLAY_NAME_INDEX 1
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayKeywordValue( const char* locale,
|
||||
const char* keyword,
|
||||
const char* displayLocale,
|
||||
UChar* dest,
|
||||
int32_t destCapacity,
|
||||
UErrorCode* status){
|
||||
|
||||
|
||||
char keywordValue[ULOC_FULLNAME_CAPACITY*4];
|
||||
int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
|
||||
int32_t keywordValueLen =0;
|
||||
|
||||
/* argument checking */
|
||||
if(status==NULL || U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
|
||||
*status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get the keyword value */
|
||||
keywordValue[0]=0;
|
||||
keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
|
||||
|
||||
/*
|
||||
* if the keyword is equal to currency .. then to get the display name
|
||||
* we need to do the fallback ourselves
|
||||
*/
|
||||
if(uprv_stricmp(keyword, _kCurrency)==0){
|
||||
|
||||
int32_t dispNameLen = 0;
|
||||
const UChar *dispName = NULL;
|
||||
|
||||
UResourceBundle *bundle = ures_open(U_ICUDATA_CURR, displayLocale, status);
|
||||
UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
|
||||
UResourceBundle *currency = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
|
||||
|
||||
dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
|
||||
|
||||
/*close the bundles */
|
||||
ures_close(currency);
|
||||
ures_close(currencies);
|
||||
ures_close(bundle);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
if(*status == U_MISSING_RESOURCE_ERROR){
|
||||
/* we just want to write the value over if nothing is available */
|
||||
*status = U_USING_DEFAULT_WARNING;
|
||||
}else{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* now copy the dispName over if not NULL */
|
||||
if(dispName != NULL){
|
||||
if(dispNameLen <= destCapacity){
|
||||
uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
|
||||
return u_terminateUChars(dest, destCapacity, dispNameLen, status);
|
||||
}else{
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
return dispNameLen;
|
||||
}
|
||||
}else{
|
||||
/* we have not found the display name for the value .. just copy over */
|
||||
if(keywordValueLen <= destCapacity){
|
||||
u_charsToUChars(keywordValue, dest, keywordValueLen);
|
||||
return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
|
||||
}else{
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
return keywordValueLen;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}else{
|
||||
|
||||
return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
|
||||
_kTypes, keyword,
|
||||
keywordValue,
|
||||
keywordValue,
|
||||
dest, destCapacity,
|
||||
status);
|
||||
}
|
||||
}
|
||||
1047
UnicodeConverter/icubuilds-mac/icu/icu/common/locid.cpp
Normal file
1047
UnicodeConverter/icubuilds-mac/icu/icu/common/locid.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1333
UnicodeConverter/icubuilds-mac/icu/icu/common/loclikely.cpp
Normal file
1333
UnicodeConverter/icubuilds-mac/icu/icu/common/loclikely.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1135
UnicodeConverter/icubuilds-mac/icu/icu/common/locmap.c
Normal file
1135
UnicodeConverter/icubuilds-mac/icu/icu/common/locmap.c
Normal file
File diff suppressed because it is too large
Load Diff
37
UnicodeConverter/icubuilds-mac/icu/icu/common/locmap.h
Normal file
37
UnicodeConverter/icubuilds-mac/icu/icu/common/locmap.h
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File locmap.h : Locale Mapping Classes
|
||||
*
|
||||
*
|
||||
* Created by: Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 3/11/97 aliu Added setId().
|
||||
* 4/20/99 Madhu Added T_convertToPosix()
|
||||
* 09/18/00 george Removed the memory leaks.
|
||||
* 08/23/01 george Convert to C
|
||||
*============================================================================
|
||||
*/
|
||||
|
||||
#ifndef LOCMAP_H
|
||||
#define LOCMAP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
|
||||
|
||||
U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status);
|
||||
|
||||
/* Don't call this function directly. Use uloc_getLCID instead. */
|
||||
U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status);
|
||||
|
||||
#endif /* LOCMAP_H */
|
||||
|
||||
223
UnicodeConverter/icubuilds-mac/icu/icu/common/locresdata.cpp
Normal file
223
UnicodeConverter/icubuilds-mac/icu/icu/common/locresdata.cpp
Normal file
@ -0,0 +1,223 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: loclikely.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010feb25
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Code for miscellaneous locale-related resource bundle data access,
|
||||
* separated out from other .cpp files
|
||||
* that then do not depend on resource bundle code and this data.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "cstring.h"
|
||||
#include "ulocimp.h"
|
||||
#include "uresimp.h"
|
||||
|
||||
/*
|
||||
* Lookup a resource bundle table item with fallback on the table level.
|
||||
* Regular resource bundle lookups perform fallback to parent locale bundles
|
||||
* and eventually the root bundle, but only for top-level items.
|
||||
* This function takes the name of a top-level table and of an item in that table
|
||||
* and performs a lookup of both, falling back until a bundle contains a table
|
||||
* with this item.
|
||||
*
|
||||
* Note: Only the opening of entire bundles falls back through the default locale
|
||||
* before root. Once a bundle is open, item lookups do not go through the
|
||||
* default locale because that would result in a mix of languages that is
|
||||
* unpredictable to the programmer and most likely useless.
|
||||
*/
|
||||
U_CAPI const UChar * U_EXPORT2
|
||||
uloc_getTableStringWithFallback(const char *path, const char *locale,
|
||||
const char *tableKey, const char *subTableKey,
|
||||
const char *itemKey,
|
||||
int32_t *pLength,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
|
||||
UResourceBundle *rb=NULL, table, subTable;
|
||||
const UChar *item=NULL;
|
||||
UErrorCode errorCode;
|
||||
char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
|
||||
|
||||
/*
|
||||
* open the bundle for the current locale
|
||||
* this falls back through the locale's chain to root
|
||||
*/
|
||||
errorCode=U_ZERO_ERROR;
|
||||
rb=ures_open(path, locale, &errorCode);
|
||||
|
||||
if(U_FAILURE(errorCode)) {
|
||||
/* total failure, not even root could be opened */
|
||||
*pErrorCode=errorCode;
|
||||
return NULL;
|
||||
} else if(errorCode==U_USING_DEFAULT_WARNING ||
|
||||
(errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
|
||||
) {
|
||||
/* set the "strongest" error code (success->fallback->default->failure) */
|
||||
*pErrorCode=errorCode;
|
||||
}
|
||||
|
||||
for(;;){
|
||||
ures_initStackObject(&table);
|
||||
ures_initStackObject(&subTable);
|
||||
ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
|
||||
|
||||
if (subTableKey != NULL) {
|
||||
/*
|
||||
ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
|
||||
item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
|
||||
if(U_FAILURE(errorCode)){
|
||||
*pErrorCode = errorCode;
|
||||
}
|
||||
|
||||
break;*/
|
||||
|
||||
ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
|
||||
}
|
||||
if(U_SUCCESS(errorCode)){
|
||||
item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
|
||||
if(U_FAILURE(errorCode)){
|
||||
const char* replacement = NULL;
|
||||
*pErrorCode = errorCode; /*save the errorCode*/
|
||||
errorCode = U_ZERO_ERROR;
|
||||
/* may be a deprecated code */
|
||||
if(uprv_strcmp(tableKey, "Countries")==0){
|
||||
replacement = uloc_getCurrentCountryID(itemKey);
|
||||
}else if(uprv_strcmp(tableKey, "Languages")==0){
|
||||
replacement = uloc_getCurrentLanguageID(itemKey);
|
||||
}
|
||||
/*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
|
||||
if(replacement!=NULL && itemKey != replacement){
|
||||
item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
|
||||
if(U_SUCCESS(errorCode)){
|
||||
*pErrorCode = errorCode;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(U_FAILURE(errorCode)){
|
||||
|
||||
/* still can't figure out ?.. try the fallback mechanism */
|
||||
int32_t len = 0;
|
||||
const UChar* fallbackLocale = NULL;
|
||||
*pErrorCode = errorCode;
|
||||
errorCode = U_ZERO_ERROR;
|
||||
|
||||
fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
|
||||
if(U_FAILURE(errorCode)){
|
||||
*pErrorCode = errorCode;
|
||||
break;
|
||||
}
|
||||
|
||||
u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
|
||||
|
||||
/* guard against recursive fallback */
|
||||
if(uprv_strcmp(explicitFallbackName, locale)==0){
|
||||
*pErrorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||
break;
|
||||
}
|
||||
ures_close(rb);
|
||||
rb = ures_open(path, explicitFallbackName, &errorCode);
|
||||
if(U_FAILURE(errorCode)){
|
||||
*pErrorCode = errorCode;
|
||||
break;
|
||||
}
|
||||
/* succeeded in opening the fallback bundle .. continue and try to fetch the item */
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* done with the locale string - ready to close table and rb */
|
||||
ures_close(&subTable);
|
||||
ures_close(&table);
|
||||
ures_close(rb);
|
||||
return item;
|
||||
}
|
||||
|
||||
static ULayoutType
|
||||
_uloc_getOrientationHelper(const char* localeId,
|
||||
const char* key,
|
||||
UErrorCode *status)
|
||||
{
|
||||
ULayoutType result = ULOC_LAYOUT_UNKNOWN;
|
||||
|
||||
if (!U_FAILURE(*status)) {
|
||||
int32_t length = 0;
|
||||
char localeBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
|
||||
uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
|
||||
|
||||
if (!U_FAILURE(*status)) {
|
||||
const UChar* const value =
|
||||
uloc_getTableStringWithFallback(
|
||||
NULL,
|
||||
localeBuffer,
|
||||
"layout",
|
||||
NULL,
|
||||
key,
|
||||
&length,
|
||||
status);
|
||||
|
||||
if (!U_FAILURE(*status) && length != 0) {
|
||||
switch(value[0])
|
||||
{
|
||||
case 0x0062: /* 'b' */
|
||||
result = ULOC_LAYOUT_BTT;
|
||||
break;
|
||||
case 0x006C: /* 'l' */
|
||||
result = ULOC_LAYOUT_LTR;
|
||||
break;
|
||||
case 0x0072: /* 'r' */
|
||||
result = ULOC_LAYOUT_RTL;
|
||||
break;
|
||||
case 0x0074: /* 't' */
|
||||
result = ULOC_LAYOUT_TTB;
|
||||
break;
|
||||
default:
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI ULayoutType U_EXPORT2
|
||||
uloc_getCharacterOrientation(const char* localeId,
|
||||
UErrorCode *status)
|
||||
{
|
||||
return _uloc_getOrientationHelper(localeId, "characters", status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the layout line orientation for the specified locale.
|
||||
*
|
||||
* @param localeID locale name
|
||||
* @param status Error status
|
||||
* @return an enum indicating the layout orientation for lines.
|
||||
*/
|
||||
U_CAPI ULayoutType U_EXPORT2
|
||||
uloc_getLineOrientation(const char* localeId,
|
||||
UErrorCode *status)
|
||||
{
|
||||
return _uloc_getOrientationHelper(localeId, "lines", status);
|
||||
}
|
||||
273
UnicodeConverter/icubuilds-mac/icu/icu/common/locutil.cpp
Normal file
273
UnicodeConverter/icubuilds-mac/icu/icu/common/locutil.cpp
Normal file
@ -0,0 +1,273 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2014, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/resbund.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "locutil.h"
|
||||
#include "charstr.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uassert.h"
|
||||
#include "umutex.h"
|
||||
|
||||
// see LocaleUtility::getAvailableLocaleNames
|
||||
static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::Hashtable * LocaleUtility_cache = NULL;
|
||||
|
||||
#define UNDERSCORE_CHAR ((UChar)0x005f)
|
||||
#define AT_SIGN_CHAR ((UChar)64)
|
||||
#define PERIOD_CHAR ((UChar)46)
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* Release all static memory held by Locale Utility.
|
||||
*/
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV service_cleanup(void) {
|
||||
if (LocaleUtility_cache) {
|
||||
delete LocaleUtility_cache;
|
||||
LocaleUtility_cache = NULL;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static void U_CALLCONV locale_utility_init(UErrorCode &status) {
|
||||
using namespace icu;
|
||||
U_ASSERT(LocaleUtility_cache == NULL);
|
||||
ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
|
||||
LocaleUtility_cache = new Hashtable(status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete LocaleUtility_cache;
|
||||
LocaleUtility_cache = NULL;
|
||||
return;
|
||||
}
|
||||
if (LocaleUtility_cache == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UnicodeString&
|
||||
LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
|
||||
{
|
||||
if (id == NULL) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
// Fix case only (no other changes) up to the first '@' or '.' or
|
||||
// end of string, whichever comes first. In 3.0 I changed this to
|
||||
// stop at first '@' or '.'. It used to run out to the end of
|
||||
// string. My fix makes the tests pass but is probably
|
||||
// structurally incorrect. See below. [alan 3.0]
|
||||
|
||||
// TODO: Doug, you might want to revise this...
|
||||
result = *id;
|
||||
int32_t i = 0;
|
||||
int32_t end = result.indexOf(AT_SIGN_CHAR);
|
||||
int32_t n = result.indexOf(PERIOD_CHAR);
|
||||
if (n >= 0 && n < end) {
|
||||
end = n;
|
||||
}
|
||||
if (end < 0) {
|
||||
end = result.length();
|
||||
}
|
||||
n = result.indexOf(UNDERSCORE_CHAR);
|
||||
if (n < 0) {
|
||||
n = end;
|
||||
}
|
||||
for (; i < n; ++i) {
|
||||
UChar c = result.charAt(i);
|
||||
if (c >= 0x0041 && c <= 0x005a) {
|
||||
c += 0x20;
|
||||
result.setCharAt(i, c);
|
||||
}
|
||||
}
|
||||
for (n = end; i < n; ++i) {
|
||||
UChar c = result.charAt(i);
|
||||
if (c >= 0x0061 && c <= 0x007a) {
|
||||
c -= 0x20;
|
||||
result.setCharAt(i, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
#if 0
|
||||
// This code does a proper full level 2 canonicalization of id.
|
||||
// It's nasty to go from UChar to char to char to UChar -- but
|
||||
// that's what you have to do to use the uloc_canonicalize
|
||||
// function on UnicodeStrings.
|
||||
|
||||
// I ended up doing the alternate fix (see above) not for
|
||||
// performance reasons, although performance will certainly be
|
||||
// better, but because doing a full level 2 canonicalization
|
||||
// causes some tests to fail. [alan 3.0]
|
||||
|
||||
// TODO: Doug, you might want to revisit this...
|
||||
result.setToBogus();
|
||||
if (id != 0) {
|
||||
int32_t buflen = id->length() + 8; // space for NUL
|
||||
char* buf = (char*) uprv_malloc(buflen);
|
||||
char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
|
||||
if (buf != 0 && canon != 0) {
|
||||
U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
uloc_canonicalize(buf, canon, buflen, &ec);
|
||||
if (U_SUCCESS(ec)) {
|
||||
result = UnicodeString(canon);
|
||||
}
|
||||
}
|
||||
uprv_free(buf);
|
||||
uprv_free(canon);
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
Locale&
|
||||
LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
|
||||
{
|
||||
enum { BUFLEN = 128 }; // larger than ever needed
|
||||
|
||||
if (id.isBogus() || id.length() >= BUFLEN) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/*
|
||||
* We need to convert from a UnicodeString to char * in order to
|
||||
* create a Locale.
|
||||
*
|
||||
* Problem: Locale ID strings may contain '@' which is a variant
|
||||
* character and cannot be handled by invariant-character conversion.
|
||||
*
|
||||
* Hack: Since ICU code can handle locale IDs with multiple encodings
|
||||
* of '@' (at least for EBCDIC; it's not known to be a problem for
|
||||
* ASCII-based systems),
|
||||
* we use regular invariant-character conversion for everything else
|
||||
* and manually convert U+0040 into a compiler-char-constant '@'.
|
||||
* While this compilation-time constant may not match the runtime
|
||||
* encoding of '@', it should be one of the encodings which ICU
|
||||
* recognizes.
|
||||
*
|
||||
* There should be only at most one '@' in a locale ID.
|
||||
*/
|
||||
char buffer[BUFLEN];
|
||||
int32_t prev, i;
|
||||
prev = 0;
|
||||
for(;;) {
|
||||
i = id.indexOf((UChar)0x40, prev);
|
||||
if(i < 0) {
|
||||
// no @ between prev and the rest of the string
|
||||
id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
|
||||
break; // done
|
||||
} else {
|
||||
// normal invariant-character conversion for text between @s
|
||||
id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
|
||||
// manually "convert" U+0040 at id[i] into '@' at buffer[i]
|
||||
buffer[i] = '@';
|
||||
prev = i + 1;
|
||||
}
|
||||
}
|
||||
result = Locale::createFromName(buffer);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
|
||||
{
|
||||
if (locale.isBogus()) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
result.append(UnicodeString(locale.getName(), -1, US_INV));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const Hashtable*
|
||||
LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
|
||||
{
|
||||
// LocaleUtility_cache is a hash-of-hashes. The top-level keys
|
||||
// are path strings ('bundleID') passed to
|
||||
// ures_openAvailableLocales. The top-level values are
|
||||
// second-level hashes. The second-level keys are result strings
|
||||
// from ures_openAvailableLocales. The second-level values are
|
||||
// garbage ((void*)1 or other random pointer).
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
|
||||
Hashtable *cache = LocaleUtility_cache;
|
||||
if (cache == NULL) {
|
||||
// Catastrophic failure.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Hashtable* htp;
|
||||
umtx_lock(NULL);
|
||||
htp = (Hashtable*) cache->get(bundleID);
|
||||
umtx_unlock(NULL);
|
||||
|
||||
if (htp == NULL) {
|
||||
htp = new Hashtable(status);
|
||||
if (htp && U_SUCCESS(status)) {
|
||||
CharString cbundleID;
|
||||
cbundleID.appendInvariantChars(bundleID, status);
|
||||
const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
|
||||
UEnumeration *uenum = ures_openAvailableLocales(path, &status);
|
||||
for (;;) {
|
||||
const UChar* id = uenum_unext(uenum, NULL, &status);
|
||||
if (id == NULL) {
|
||||
break;
|
||||
}
|
||||
htp->put(UnicodeString(id), (void*)htp, status);
|
||||
}
|
||||
uenum_close(uenum);
|
||||
if (U_FAILURE(status)) {
|
||||
delete htp;
|
||||
return NULL;
|
||||
}
|
||||
umtx_lock(NULL);
|
||||
Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));
|
||||
if (t != NULL) {
|
||||
// Another thread raced through this code, creating the cache entry first.
|
||||
// Discard ours and return theirs.
|
||||
umtx_unlock(NULL);
|
||||
delete htp;
|
||||
htp = t;
|
||||
} else {
|
||||
cache->put(bundleID, (void*)htp, status);
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
return htp;
|
||||
}
|
||||
|
||||
UBool
|
||||
LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
|
||||
{
|
||||
return child.indexOf(root) == 0 &&
|
||||
(child.length() == root.length() ||
|
||||
child.charAt(root.length()) == UNDERSCORE_CHAR);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* !UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
|
||||
37
UnicodeConverter/icubuilds-mac/icu/icu/common/locutil.h
Normal file
37
UnicodeConverter/icubuilds-mac/icu/icu/common/locutil.h
Normal file
@ -0,0 +1,37 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef LOCUTIL_H
|
||||
#define LOCUTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "hash.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// temporary utility functions, till I know where to find them
|
||||
// in header so tests can also access them
|
||||
|
||||
class U_COMMON_API LocaleUtility {
|
||||
public:
|
||||
static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result);
|
||||
static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
|
||||
static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
|
||||
static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
|
||||
static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
63
UnicodeConverter/icubuilds-mac/icu/icu/common/messageimpl.h
Normal file
63
UnicodeConverter/icubuilds-mac/icu/icu/common/messageimpl.h
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: messageimpl.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2011apr04
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __MESSAGEIMPL_H__
|
||||
#define __MESSAGEIMPL_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/messagepattern.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Helper functions for use of MessagePattern.
|
||||
* In Java, these are package-private methods in MessagePattern itself.
|
||||
* In C++, they are declared here and implemented in messagepattern.cpp.
|
||||
*/
|
||||
class U_COMMON_API MessageImpl {
|
||||
public:
|
||||
/**
|
||||
* @return TRUE if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
|
||||
*/
|
||||
static UBool jdkAposMode(const MessagePattern &msgPattern) {
|
||||
return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
|
||||
* according to JDK pattern behavior.
|
||||
*/
|
||||
static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
|
||||
UnicodeString &sb);
|
||||
|
||||
/**
|
||||
* Appends the sub-message to the result string.
|
||||
* Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes().
|
||||
*/
|
||||
static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
|
||||
int32_t msgStart,
|
||||
UnicodeString &result);
|
||||
|
||||
private:
|
||||
MessageImpl(); // no constructor: all static methods
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // !UCONFIG_NO_FORMATTING
|
||||
|
||||
#endif // __MESSAGEIMPL_H__
|
||||
1231
UnicodeConverter/icubuilds-mac/icu/icu/common/messagepattern.cpp
Normal file
1231
UnicodeConverter/icubuilds-mac/icu/icu/common/messagepattern.cpp
Normal file
File diff suppressed because it is too large
Load Diff
23
UnicodeConverter/icubuilds-mac/icu/icu/common/msvcres.h
Normal file
23
UnicodeConverter/icubuilds-mac/icu/icu/common/msvcres.h
Normal file
@ -0,0 +1,23 @@
|
||||
//{{NO_DEPENDENCIES}}
|
||||
// Copyright (c) 2003-2010 International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Used by common.rc and other .rc files.
|
||||
//Do not edit with Microsoft Developer Studio because it will modify this
|
||||
//header the wrong way. This is here to prevent Visual Studio .NET from
|
||||
//unnessarily building the resource files when it's not needed.
|
||||
//
|
||||
|
||||
/*
|
||||
These are defined before unicode/uversion.h in order to prevent
|
||||
STLPort's broken stddef.h from being used when rc.exe parses this file.
|
||||
*/
|
||||
#define _STLP_OUTERMOST_HEADER_ID 0
|
||||
#define _STLP_WINCE 1
|
||||
|
||||
#include "unicode/uversion.h"
|
||||
|
||||
#define ICU_WEBSITE "http://icu-project.org"
|
||||
#define ICU_COMPANY "The ICU Project"
|
||||
#define ICU_PRODUCT_PREFIX "ICU"
|
||||
#define ICU_PRODUCT "International Components for Unicode"
|
||||
77
UnicodeConverter/icubuilds-mac/icu/icu/common/mutex.h
Normal file
77
UnicodeConverter/icubuilds-mac/icu/icu/common/mutex.h
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
//----------------------------------------------------------------------------
|
||||
// File: mutex.h
|
||||
//
|
||||
// Lightweight C++ wrapper for umtx_ C mutex functions
|
||||
//
|
||||
// Author: Alan Liu 1/31/97
|
||||
// History:
|
||||
// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop.
|
||||
// 04/07/1999 srl refocused as a thin wrapper
|
||||
//
|
||||
//----------------------------------------------------------------------------
|
||||
#ifndef MUTEX_H
|
||||
#define MUTEX_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "umutex.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Code within that accesses shared static or global data should
|
||||
// should instantiate a Mutex object while doing so. You should make your own
|
||||
// private mutex where possible.
|
||||
|
||||
// For example:
|
||||
//
|
||||
// UMutex myMutex;
|
||||
//
|
||||
// void Function(int arg1, int arg2)
|
||||
// {
|
||||
// static Object* foo; // Shared read-write object
|
||||
// Mutex mutex(&myMutex); // or no args for the global lock
|
||||
// foo->Method();
|
||||
// // When 'mutex' goes out of scope and gets destroyed here, the lock is released
|
||||
// }
|
||||
//
|
||||
// Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
|
||||
// returning a Mutex. This is a common mistake which silently slips through the
|
||||
// compiler!!
|
||||
//
|
||||
|
||||
class U_COMMON_API Mutex : public UMemory {
|
||||
public:
|
||||
inline Mutex(UMutex *mutex = NULL);
|
||||
inline ~Mutex();
|
||||
|
||||
private:
|
||||
UMutex *fMutex;
|
||||
|
||||
Mutex(const Mutex &other); // forbid copying of this class
|
||||
Mutex &operator=(const Mutex &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
inline Mutex::Mutex(UMutex *mutex)
|
||||
: fMutex(mutex)
|
||||
{
|
||||
umtx_lock(fMutex);
|
||||
}
|
||||
|
||||
inline Mutex::~Mutex()
|
||||
{
|
||||
umtx_unlock(fMutex);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //_MUTEX_
|
||||
//eof
|
||||
1126
UnicodeConverter/icubuilds-mac/icu/icu/common/norm2_nfc_data.h
Normal file
1126
UnicodeConverter/icubuilds-mac/icu/icu/common/norm2_nfc_data.h
Normal file
File diff suppressed because it is too large
Load Diff
341
UnicodeConverter/icubuilds-mac/icu/icu/common/norm2allmodes.h
Normal file
341
UnicodeConverter/icubuilds-mac/icu/icu/common/norm2allmodes.h
Normal file
@ -0,0 +1,341 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* loadednormalizer2impl.h
|
||||
*
|
||||
* created on: 2014sep07
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __NORM2ALLMODES_H__
|
||||
#define __NORM2ALLMODES_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "cpputils.h"
|
||||
#include "normalizer2impl.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// Intermediate class:
|
||||
// Has Normalizer2Impl and does boilerplate argument checking and setup.
|
||||
class Normalizer2WithImpl : public Normalizer2 {
|
||||
public:
|
||||
Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
|
||||
virtual ~Normalizer2WithImpl();
|
||||
|
||||
// normalize
|
||||
virtual UnicodeString &
|
||||
normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
}
|
||||
const UChar *sArray=src.getBuffer();
|
||||
if(&dest==&src || sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
}
|
||||
dest.remove();
|
||||
ReorderingBuffer buffer(impl, dest);
|
||||
if(buffer.init(src.length(), errorCode)) {
|
||||
normalize(sArray, sArray+src.length(), buffer, errorCode);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
|
||||
|
||||
// normalize and append
|
||||
virtual UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
|
||||
}
|
||||
virtual UnicodeString &
|
||||
append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
|
||||
}
|
||||
UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UBool doNormalize,
|
||||
UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(first, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return first;
|
||||
}
|
||||
const UChar *secondArray=second.getBuffer();
|
||||
if(&first==&second || secondArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return first;
|
||||
}
|
||||
int32_t firstLength=first.length();
|
||||
UnicodeString safeMiddle;
|
||||
{
|
||||
ReorderingBuffer buffer(impl, first);
|
||||
if(buffer.init(firstLength+second.length(), errorCode)) {
|
||||
normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
|
||||
safeMiddle, buffer, errorCode);
|
||||
}
|
||||
} // The ReorderingBuffer destructor finalizes the first string.
|
||||
if(U_FAILURE(errorCode)) {
|
||||
// Restore the modified suffix of the first string.
|
||||
first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
|
||||
}
|
||||
return first;
|
||||
}
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
UChar buffer[4];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getDecomposition(c, buffer, length);
|
||||
if(d==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if(d==buffer) {
|
||||
decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
|
||||
} else {
|
||||
decomposition.setTo(FALSE, d, length); // read-only alias
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
virtual UBool
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
UChar buffer[30];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getRawDecomposition(c, buffer, length);
|
||||
if(d==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if(d==buffer) {
|
||||
decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
|
||||
} else {
|
||||
decomposition.setTo(FALSE, d, length); // read-only alias
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
virtual UChar32
|
||||
composePair(UChar32 a, UChar32 b) const {
|
||||
return impl.composePair(a, b);
|
||||
}
|
||||
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const {
|
||||
return impl.getCC(impl.getNorm16(c));
|
||||
}
|
||||
|
||||
// quick checks
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sLimit=sArray+s.length();
|
||||
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
|
||||
}
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual int32_t
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
|
||||
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
|
||||
return UNORM_YES;
|
||||
}
|
||||
|
||||
const Normalizer2Impl &impl;
|
||||
};
|
||||
|
||||
class DecomposeNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
|
||||
virtual ~DecomposeNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.decompose(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
return impl.decompose(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
|
||||
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
|
||||
};
|
||||
|
||||
class ComposeNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
|
||||
Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
|
||||
virtual ~ComposeNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
UnicodeString temp;
|
||||
ReorderingBuffer buffer(impl, temp);
|
||||
if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
|
||||
return FALSE;
|
||||
}
|
||||
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
|
||||
}
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
UNormalizationCheckResult qcResult=UNORM_YES;
|
||||
impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
|
||||
return qcResult;
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
|
||||
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
|
||||
return impl.getCompQuickCheck(impl.getNorm16(c));
|
||||
}
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const {
|
||||
return impl.hasCompBoundaryBefore(c);
|
||||
}
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const {
|
||||
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
|
||||
}
|
||||
virtual UBool isInert(UChar32 c) const {
|
||||
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
|
||||
}
|
||||
|
||||
const UBool onlyContiguous;
|
||||
};
|
||||
|
||||
class FCDNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
|
||||
virtual ~FCDNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.makeFCD(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
return impl.makeFCD(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
|
||||
};
|
||||
|
||||
struct Norm2AllModes : public UMemory {
|
||||
Norm2AllModes(Normalizer2Impl *i)
|
||||
: impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
|
||||
~Norm2AllModes();
|
||||
|
||||
static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
|
||||
static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
|
||||
static Norm2AllModes *createInstance(const char *packageName,
|
||||
const char *name,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
|
||||
static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
|
||||
static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
|
||||
|
||||
Normalizer2Impl *impl;
|
||||
ComposeNormalizer2 comp;
|
||||
DecomposeNormalizer2 decomp;
|
||||
FCDNormalizer2 fcd;
|
||||
ComposeNormalizer2 fcc;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // !UCONFIG_NO_NORMALIZATION
|
||||
#endif // __NORM2ALLMODES_H__
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user