From 7569534cfd9754c2a829df29b466e9d6e5f3a285 Mon Sep 17 00:00:00 2001 From: Kulikova Svetlana Date: Mon, 4 Oct 2021 18:11:05 +0300 Subject: [PATCH] c++ fix merge with develop --- .../graphics/pro/js/qt/nativegraphics.pro | 136 +- DjVuFile/DjVuFile.pro | 4 +- DjVuFile/libdjvu/BSEncodeByteStream.cpp | 2 +- DjVuFile/libdjvu/DataPool.cpp | 2 +- DjVuFile/libdjvu/DataPool.h | 2 +- DjVuFile/libdjvu/DjVmDir.h | 2 +- DjVuFile/libdjvu/DjVmDir0.h | 2 +- DjVuFile/libdjvu/DjVmNav.h | 2 +- DjVuFile/libdjvu/DjVuAnno.h | 2 +- .../libdjvu/{GString.cpp => DjVuGString.cpp} | 5504 ++++++++--------- DjVuFile/libdjvu/{GString.h => DjVuGString.h} | 3354 +++++----- DjVuFile/libdjvu/DjVuInfo.cpp | 2 +- DjVuFile/libdjvu/DjVuMessageLite.h | 2 +- DjVuFile/libdjvu/DjVuNavDir.h | 2 +- DjVuFile/libdjvu/GBitmap.cpp | 2 +- DjVuFile/libdjvu/GIFFManager.h | 2 +- DjVuFile/libdjvu/GMapAreas.h | 2 +- DjVuFile/libdjvu/GOS.h | 2 +- DjVuFile/libdjvu/GPixmap.cpp | 2 +- DjVuFile/libdjvu/GURL.h | 2 +- DjVuFile/libdjvu/GUnicode.cpp | 2 +- DjVuFile/libdjvu/IFFByteStream.h | 2 +- DjVuFile/libdjvu/JB2Image.h | 2 +- DjVuFile/libdjvu/JPEGDecoder.cpp | 2 +- DjVuFile/libdjvu/UnicodeByteStream.h | 2 +- DjVuFile/libdjvu/XMLTags.h | 2 +- DjVuFile/libdjvu/debug.cpp | 4 +- PdfReader/PdfReader.cpp | 20 +- PdfReader/PdfReader.h | 8 + PdfReader/Src/RendererOutputDev.cpp | 50 +- PdfReader/lib/xpdf/PDFDoc.cc | 17 + PdfReader/lib/xpdf/PDFDoc.h | 2 + 32 files changed, 4583 insertions(+), 4560 deletions(-) rename DjVuFile/libdjvu/{GString.cpp => DjVuGString.cpp} (95%) rename DjVuFile/libdjvu/{GString.h => DjVuGString.h} (97%) diff --git a/DesktopEditor/graphics/pro/js/qt/nativegraphics.pro b/DesktopEditor/graphics/pro/js/qt/nativegraphics.pro index 36bd5fd299..2ff7a435f2 100644 --- a/DesktopEditor/graphics/pro/js/qt/nativegraphics.pro +++ b/DesktopEditor/graphics/pro/js/qt/nativegraphics.pro @@ -39,7 +39,8 @@ DEFINES -= _UNICODE LIBS += -lgdi32 \ -ladvapi32 \ -luser32 \ - -lshell32 + -lshell32 \ + -lOle32 } # graphics @@ -514,7 +515,7 @@ HEADERS += \ $$DJVU_ROOT_DIR/libdjvu/GRect.h \ $$DJVU_ROOT_DIR/libdjvu/GScaler.h \ $$DJVU_ROOT_DIR/libdjvu/GSmartPointer.h \ - $$DJVU_ROOT_DIR/libdjvu/GString.h \ + $$DJVU_ROOT_DIR/libdjvu/DjVuGString.h \ $$DJVU_ROOT_DIR/libdjvu/GThreads.h \ $$DJVU_ROOT_DIR/libdjvu/GURL.h \ $$DJVU_ROOT_DIR/libdjvu/IFFByteStream.h \ @@ -570,7 +571,7 @@ SOURCES += \ $$DJVU_ROOT_DIR/libdjvu/GRect.cpp \ $$DJVU_ROOT_DIR/libdjvu/GScaler.cpp \ $$DJVU_ROOT_DIR/libdjvu/GSmartPointer.cpp \ - $$DJVU_ROOT_DIR/libdjvu/GString.cpp \ + $$DJVU_ROOT_DIR/libdjvu/DjVuGString.cpp \ $$DJVU_ROOT_DIR/libdjvu/GThreads.cpp \ $$DJVU_ROOT_DIR/libdjvu/GUnicode.cpp \ #$$DJVU_ROOT_DIR/libdjvu/GURL.cpp \ @@ -599,6 +600,37 @@ DEFINES += CRYPTOPP_DISABLE_ASM LIBS += -L$$CORE_BUILDS_LIBRARIES_PATH -lCryptoPPLib PDF_ROOT_DIR = $$PWD/../../../../../PdfReader +INCLUDEPATH += \ + $$PDF_ROOT_DIR/lib/goo \ + $$PDF_ROOT_DIR/lib/fofi \ + $$PDF_ROOT_DIR/lib/splash \ + $$PDF_ROOT_DIR/lib + +HEADERS += \ + $$PDF_ROOT_DIR/lib/aconf.h \ + $$$files($$PDF_ROOT_DIR/lib/*.h) + +SOURCES += $$files($$PDF_ROOT_DIR/lib/*.c, true) +SOURCES += $$files($$PDF_ROOT_DIR/lib/*.cc, true) + +SOURCES -= \ + $$PDF_ROOT_DIR/lib/xpdf/HTMLGen.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdftohtml.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdftopng.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdftoppm.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdftops.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdftotext.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdfdetach.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdffonts.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdfimages.cc \ + $$PDF_ROOT_DIR/lib/xpdf/pdfinfo.cc + +SOURCES += \ + $$PDF_ROOT_DIR/Src/RendererOutputDev.cpp \ + $$PDF_ROOT_DIR/Src/Adaptors.cpp \ + $$PDF_ROOT_DIR/Src/GfxClip.cpp \ + $$PDF_ROOT_DIR/PdfReader.cpp + HEADERS +=\ $$PDF_ROOT_DIR/Resources/Fontd050000l.h \ $$PDF_ROOT_DIR/Resources/Fontn019003l.h \ @@ -614,103 +646,11 @@ HEADERS +=\ $$PDF_ROOT_DIR/Resources/Fontn022023l.h \ $$PDF_ROOT_DIR/Resources/Fontn022024l.h \ $$PDF_ROOT_DIR/Resources/Fonts050000l.h \ - $$PDF_ROOT_DIR/Src/Annot.h \ - $$PDF_ROOT_DIR/Src/Array.h \ - $$PDF_ROOT_DIR/Src/BuiltinFont.h \ - $$PDF_ROOT_DIR/Src/BuiltinFontTables.h \ - $$PDF_ROOT_DIR/Src/Catalog.h \ - $$PDF_ROOT_DIR/Src/CCITT-Tables.h \ - $$PDF_ROOT_DIR/Src/CharCodeToUnicode.h \ - $$PDF_ROOT_DIR/Src/CharTypes.h \ - $$PDF_ROOT_DIR/Src/CMap.h \ - $$PDF_ROOT_DIR/Src/Constants.h \ - $$PDF_ROOT_DIR/Src/Decrypt.h \ - $$PDF_ROOT_DIR/Src/Dict.h \ - $$PDF_ROOT_DIR/Src/EncodingTables.h \ - $$PDF_ROOT_DIR/Src/ErrorConstants.h \ - $$PDF_ROOT_DIR/Src/ExtractImageOutputDev.h \ - $$PDF_ROOT_DIR/Src/File.h \ - $$PDF_ROOT_DIR/Src/FontFileBase.h \ - $$PDF_ROOT_DIR/Src/FontFileEncodings.h \ - $$PDF_ROOT_DIR/Src/FontFileTrueType.h \ - $$PDF_ROOT_DIR/Src/FontFileType1.h \ - $$PDF_ROOT_DIR/Src/FontFileType1C.h \ - $$PDF_ROOT_DIR/Src/Function.h \ - $$PDF_ROOT_DIR/Src/GFont.h \ - $$PDF_ROOT_DIR/Src/GlobalParams.h \ - $$PDF_ROOT_DIR/Src/Graphics.h \ - $$PDF_ROOT_DIR/Src/GState.h \ - $$PDF_ROOT_DIR/Src/Hash.h \ - $$PDF_ROOT_DIR/Src/JArithmeticDecoder.h \ - $$PDF_ROOT_DIR/Src/JBIG2Stream.h \ - $$PDF_ROOT_DIR/Src/JPXStream.h \ - $$PDF_ROOT_DIR/Src/Lexer.h \ - $$PDF_ROOT_DIR/Src/Link.h \ - $$PDF_ROOT_DIR/Src/List.h \ - $$PDF_ROOT_DIR/Src/MemoryUtils.h \ - $$PDF_ROOT_DIR/Src/NameToCharCode.h \ - $$PDF_ROOT_DIR/Src/NameToUnicodeTable.h \ - $$PDF_ROOT_DIR/Src/Object.h \ - $$PDF_ROOT_DIR/Src/Outline.h \ - $$PDF_ROOT_DIR/Src/OutputDevice.h \ - $$PDF_ROOT_DIR/Src/Page.h \ - $$PDF_ROOT_DIR/Src/PageLabels.h \ - $$PDF_ROOT_DIR/Src/Parser.h \ - $$PDF_ROOT_DIR/Src/PDFDoc.h \ - $$PDF_ROOT_DIR/Src/PDFDocEncoding.h \ - $$PDF_ROOT_DIR/Src/PSLexer.h \ $$PDF_ROOT_DIR/Src/RendererOutputDev.h \ - $$PDF_ROOT_DIR/Src/SecurityHandler.h \ - $$PDF_ROOT_DIR/Src/Stream.h \ - $$PDF_ROOT_DIR/Src/StringExt.h \ - $$PDF_ROOT_DIR/Src/UnicodeMap.h \ - $$PDF_ROOT_DIR/Src/UnicodeMapTables.h \ - $$PDF_ROOT_DIR/Src/UTF8.h \ - $$PDF_ROOT_DIR/Src/XmlUtils.h \ - $$PDF_ROOT_DIR/Src/XRef.h \ + $$PDF_ROOT_DIR/Src/Adaptors.h \ + $$PDF_ROOT_DIR/Src/MemoryUtils.h \ + $$PDF_ROOT_DIR/Src/GfxClip.h \ $$PDF_ROOT_DIR/PdfReader.h -SOURCES += \ - $$PDF_ROOT_DIR/Src/Annot.cpp \ - $$PDF_ROOT_DIR/Src/Array.cpp \ - $$PDF_ROOT_DIR/Src/Catalog.cpp \ - $$PDF_ROOT_DIR/Src/CharCodeToUnicode.cpp \ - $$PDF_ROOT_DIR/Src/CMap.cpp \ - $$PDF_ROOT_DIR/Src/Decrypt.cpp \ - $$PDF_ROOT_DIR/Src/Dict.cpp \ - $$PDF_ROOT_DIR/Src/ExtractImageOutputDev.cpp \ - $$PDF_ROOT_DIR/Src/FontFileBase.cpp \ - $$PDF_ROOT_DIR/Src/FontFileTrueType.cpp \ - $$PDF_ROOT_DIR/Src/FontFileType1.cpp \ - $$PDF_ROOT_DIR/Src/FontFileType1C.cpp \ - $$PDF_ROOT_DIR/Src/Function.cpp \ - $$PDF_ROOT_DIR/Src/GFont.cpp \ - $$PDF_ROOT_DIR/Src/GlobalParams.cpp \ - $$PDF_ROOT_DIR/Src/Graphics.cpp \ - $$PDF_ROOT_DIR/Src/GState.cpp \ - $$PDF_ROOT_DIR/Src/Hash.cpp \ - $$PDF_ROOT_DIR/Src/JArithmeticDecoder.cpp \ - $$PDF_ROOT_DIR/Src/JBIG2Stream.cpp \ - $$PDF_ROOT_DIR/Src/JPXStream.cpp \ - $$PDF_ROOT_DIR/Src/Lexer.cpp \ - $$PDF_ROOT_DIR/Src/Link.cpp \ - $$PDF_ROOT_DIR/Src/List.cpp \ - $$PDF_ROOT_DIR/Src/NameToCharCode.cpp \ - $$PDF_ROOT_DIR/Src/Object.cpp \ - $$PDF_ROOT_DIR/Src/Outline.cpp \ - $$PDF_ROOT_DIR/Src/OutputDevice.cpp \ - $$PDF_ROOT_DIR/Src/Page.cpp \ - $$PDF_ROOT_DIR/Src/PageLabels.cpp \ - $$PDF_ROOT_DIR/Src/Parser.cpp \ - $$PDF_ROOT_DIR/Src/PDFDoc.cpp \ - $$PDF_ROOT_DIR/Src/PSLexer.cpp \ - $$PDF_ROOT_DIR/Src/RendererOutputDev.cpp \ - $$PDF_ROOT_DIR/Src/SecurityHandler.cpp \ - $$PDF_ROOT_DIR/Src/Stream.cpp \ - $$PDF_ROOT_DIR/Src/StringExt.cpp \ - $$PDF_ROOT_DIR/Src/UnicodeMap.cpp \ - $$PDF_ROOT_DIR/Src/XRef.cpp \ - $$PDF_ROOT_DIR/PdfReader.cpp - HEADERS += ../wasm/src/drawingfile.h SOURCES += ../wasm/src/drawingfile.cpp diff --git a/DjVuFile/DjVuFile.pro b/DjVuFile/DjVuFile.pro index 240be44188..b5248cd172 100644 --- a/DjVuFile/DjVuFile.pro +++ b/DjVuFile/DjVuFile.pro @@ -83,7 +83,7 @@ SOURCES += DjVu.cpp \ libdjvu/GRect.cpp \ libdjvu/GScaler.cpp \ libdjvu/GSmartPointer.cpp \ - libdjvu/GString.cpp \ + libdjvu/DjVuGString.cpp \ libdjvu/GThreads.cpp \ libdjvu/GUnicode.cpp \ libdjvu/GURL.cpp \ @@ -139,7 +139,7 @@ HEADERS += DjVu.h \ libdjvu/GRect.h \ libdjvu/GScaler.h \ libdjvu/GSmartPointer.h \ - libdjvu/GString.h \ + libdjvu/DjVuGString.h \ libdjvu/GThreads.h \ libdjvu/GURL.h \ libdjvu/IFFByteStream.h \ diff --git a/DjVuFile/libdjvu/BSEncodeByteStream.cpp b/DjVuFile/libdjvu/BSEncodeByteStream.cpp index 730287c1d8..4f2f0df27a 100644 --- a/DjVuFile/libdjvu/BSEncodeByteStream.cpp +++ b/DjVuFile/libdjvu/BSEncodeByteStream.cpp @@ -68,7 +68,7 @@ #include "BSByteStream.h" -#include "GString.h" +#include "DjVuGString.h" #undef BSORT_TIMER #ifdef BSORT_TIMER #include "GOS.h" diff --git a/DjVuFile/libdjvu/DataPool.cpp b/DjVuFile/libdjvu/DataPool.cpp index fe17cfa5f6..91f334a96a 100644 --- a/DjVuFile/libdjvu/DataPool.cpp +++ b/DjVuFile/libdjvu/DataPool.cpp @@ -65,7 +65,7 @@ #include "DataPool.h" #include "IFFByteStream.h" -#include "GString.h" +#include "DjVuGString.h" #include "GOS.h" #include "GURL.h" #include "debug.h" diff --git a/DjVuFile/libdjvu/DataPool.h b/DjVuFile/libdjvu/DataPool.h index 8e966b0477..445bfe0f6d 100644 --- a/DjVuFile/libdjvu/DataPool.h +++ b/DjVuFile/libdjvu/DataPool.h @@ -67,7 +67,7 @@ #include "GThreads.h" -#include "GString.h" +#include "DjVuGString.h" #include "GURL.h" #ifdef HAVE_NAMESPACES diff --git a/DjVuFile/libdjvu/DjVmDir.h b/DjVuFile/libdjvu/DjVmDir.h index b647290226..cbf5202270 100644 --- a/DjVuFile/libdjvu/DjVmDir.h +++ b/DjVuFile/libdjvu/DjVmDir.h @@ -118,7 +118,7 @@ -#include "GString.h" +#include "DjVuGString.h" #include "GThreads.h" #ifdef HAVE_NAMESPACES diff --git a/DjVuFile/libdjvu/DjVmDir0.h b/DjVuFile/libdjvu/DjVmDir0.h index 2944c0ac12..5e135bff15 100644 --- a/DjVuFile/libdjvu/DjVmDir0.h +++ b/DjVuFile/libdjvu/DjVmDir0.h @@ -66,7 +66,7 @@ #endif -#include "GString.h" +#include "DjVuGString.h" #ifdef HAVE_NAMESPACES namespace DJVU { diff --git a/DjVuFile/libdjvu/DjVmNav.h b/DjVuFile/libdjvu/DjVmNav.h index 8ed1332ce4..9e940e69fa 100644 --- a/DjVuFile/libdjvu/DjVmNav.h +++ b/DjVuFile/libdjvu/DjVmNav.h @@ -66,7 +66,7 @@ #endif #include "DjVuGlobal.h" -#include "GString.h" +#include "DjVuGString.h" #include "GThreads.h" #ifdef HAVE_NAMESPACES diff --git a/DjVuFile/libdjvu/DjVuAnno.h b/DjVuFile/libdjvu/DjVuAnno.h index 035d2b81b2..684c86e678 100644 --- a/DjVuFile/libdjvu/DjVuAnno.h +++ b/DjVuFile/libdjvu/DjVuAnno.h @@ -91,7 +91,7 @@ //@{ -#include "GString.h" +#include "DjVuGString.h" #ifdef HAVE_NAMESPACES namespace DJVU { diff --git a/DjVuFile/libdjvu/GString.cpp b/DjVuFile/libdjvu/DjVuGString.cpp similarity index 95% rename from DjVuFile/libdjvu/GString.cpp rename to DjVuFile/libdjvu/DjVuGString.cpp index d1f68f187a..acaabffa2e 100644 --- a/DjVuFile/libdjvu/GString.cpp +++ b/DjVuFile/libdjvu/DjVuGString.cpp @@ -1,2752 +1,2752 @@ -//C- -*- C++ -*- -//C- ------------------------------------------------------------------- -//C- DjVuLibre-3.5 -//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. -//C- Copyright (c) 2001 AT&T -//C- -//C- This software is subject to, and may be distributed under, the -//C- GNU General Public License, either Version 2 of the license, -//C- or (at your option) any later version. The license should have -//C- accompanied the software or you may obtain a copy of the license -//C- from the Free Software Foundation at http://www.fsf.org . -//C- -//C- This program is distributed in the hope that it will be useful, -//C- but WITHOUT ANY WARRANTY; without even the implied warranty of -//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//C- GNU General Public License for more details. -//C- -//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from -//C- Lizardtech Software. Lizardtech Software has authorized us to -//C- replace the original DjVu(r) Reference Library notice by the following -//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu): -//C- -//C- ------------------------------------------------------------------ -//C- | DjVu (r) Reference Library (v. 3.5) -//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. -//C- | The DjVu Reference Library is protected by U.S. Pat. No. -//C- | 6,058,214 and patents pending. -//C- | -//C- | This software is subject to, and may be distributed under, the -//C- | GNU General Public License, either Version 2 of the license, -//C- | or (at your option) any later version. The license should have -//C- | accompanied the software or you may obtain a copy of the license -//C- | from the Free Software Foundation at http://www.fsf.org . -//C- | -//C- | The computer code originally released by LizardTech under this -//C- | license and unmodified by other parties is deemed "the LIZARDTECH -//C- | ORIGINAL CODE." Subject to any third party intellectual property -//C- | claims, LizardTech grants recipient a worldwide, royalty-free, -//C- | non-exclusive license to make, use, sell, or otherwise dispose of -//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the -//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU -//C- | General Public License. This grant only confers the right to -//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to -//C- | the extent such infringement is reasonably necessary to enable -//C- | recipient to make, have made, practice, sell, or otherwise dispose -//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to -//C- | any greater extent that may be necessary to utilize further -//C- | modifications or combinations. -//C- | -//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY -//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF -//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. -//C- +------------------------------------------------------------------ -// -// $Id: GString.cpp,v 1.26 2008/03/10 20:42:33 leonb Exp $ -// $Name: $ - -// From: Leon Bottou, 1/31/2002 -// This file has very little to do with my initial implementation. -// It has been practically rewritten by Lizardtech for i18n changes. -// My original implementation was very small in comparison -// . -// In my opinion, the duplication of the string classes is a failed -// attempt to use the type system to enforce coding policies. -// This could be fixed. But there are better things to do in djvulibre. - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif -#if NEED_GNUG_PRAGMAS -# pragma implementation -#endif - -#include "GString.h" -#include "GThreads.h" -#include "debug.h" - -#include -#include -#include -#if HAS_WCHAR -# include -# if !defined(AUTOCONF) || HAVE_WCHAR_H -# include -# endif -# if HAS_WCTYPE -# include -# endif -#endif -#include - -#ifndef DO_CHANGELOCALE -#define DO_CHANGELOCALE 1 -#ifdef UNIX -#if THREADMODEL != COTHREADS -#if THREADMODEL != NOTHREADS -#undef DO_CHANGELOCALE -#define DO_CHANGELOCALE 0 -#endif -#endif -#endif -#endif - - -#ifdef HAVE_NAMESPACES -namespace DJVU { -# ifdef NOT_DEFINED // Just to fool emacs c++ mode -} -#endif -#endif - - -GBaseString::~GBaseString() {} -GNativeString::~GNativeString() {} -GUTF8String::~GUTF8String() {} - -#if !HAS_MBSTATE && HAS_WCHAR -// Under some systems, wctomb() and mbtowc() are not thread -// safe. In those cases, wcrtomb and mbrtowc are preferred. -// For Solaris, wctomb() and mbtowc() are thread safe, and -// wcrtomb() and mbrtowc() don't exist. - -#define wcrtomb MYwcrtomb -#define mbrtowc MYmbrtowc -#define mbrlen MYmbrlen - -static inline int -wcrtomb(char *bytes,wchar_t w,mbstate_t *) -{ - return wctomb(bytes,w); -} - -static inline int -mbrtowc(wchar_t *w,const char *source, size_t n, mbstate_t *) -{ - return mbtowc(w,source,n); -} - -static inline size_t -mbrlen(const char *s, size_t n, mbstate_t *) -{ - return mblen(s,n); -} -#endif // !HAS_MBSTATE || HAS_WCHAR - - -GP -GStringRep::upcase(void) const -{ return tocase(giswupper,gtowupper); } - -GP -GStringRep::downcase(void) const -{ return tocase(giswlower,gtowlower); } - -GP -GStringRep::UTF8::create(const unsigned int sz) -{ - return GStringRep::create(sz,(GStringRep::UTF8 *)0); -} - -GP -GStringRep::UTF8::create(const char *s) -{ - GStringRep::UTF8 dummy; - return dummy.strdup(s); -} - -GP -GStringRep::UTF8::create(const GP &s1,const GP &s2) -{ - GStringRep::UTF8 dummy; - return dummy.concat(s1,s2); -} - -GP -GStringRep::UTF8::create( const GP &s1,const char *s2) -{ - GStringRep::UTF8 dummy; - return dummy.concat(s1,s2); -} - -GP -GStringRep::UTF8::create( const char *s1, const GP &s2) -{ - GStringRep::UTF8 dummy; - return dummy.concat(s1,s2); -} - -GP -GStringRep::UTF8::create( const char *s1,const char *s2) -{ - GStringRep::UTF8 dummy; - return dummy.concat(s1,s2); -} - -GP -GStringRep::UTF8::create(const char *s,const int start,const int length) -{ - GStringRep::UTF8 dummy; - return dummy.substr(s,start,length); -} - -GP -GStringRep::UTF8::create( - const unsigned short *s,const int start,const int length) -{ - GStringRep::UTF8 dummy; - return dummy.substr(s,start,length); -} - -GP -GStringRep::UTF8::create( - const unsigned long *s,const int start,const int length) -{ - GStringRep::UTF8 dummy; - return dummy.substr(s,start,length); -} - -GP -GStringRep::UTF8::blank(const unsigned int sz) const -{ - return GStringRep::create(sz,(GStringRep::UTF8 *)0); -} - -bool -GStringRep::UTF8::isUTF8(void) const -{ - return true; -} - -GP -GStringRep::UTF8::toThis( - const GP &rep,const GP &) const -{ - return rep?(rep->toUTF8(true)):rep; -} - -GP -GStringRep::UTF8::create(const char fmt[],va_list& args) -{ - const GP s(create(fmt)); - return (s?(s->vformat(args)):s); -} - -#if !HAS_WCHAR - -#define NATIVE_CREATE(x) UTF8::create( x ); - -#ifdef LC_ALL -#undef LC_ALL -#endif -#define LC_ALL 0 - -class GStringRep::ChangeLocale -{ -public: - ChangeLocale(const int,const char *) {} - ~ChangeLocale() {}; -}; - -GP -GStringRep::NativeToUTF8( const char *s ) -{ - return GStringRep::UTF8::create(s); -} - -#else - -#define NATIVE_CREATE(x) Native::create( x ); - -// The declaration and implementation of GStringRep::ChangeLocale -// Not used in WinCE - -class GStringRep::ChangeLocale -{ -public: - ChangeLocale(const int category,const char locale[]); - ~ChangeLocale(); -private: - GUTF8String locale; - int category; -}; - -class GStringRep::Native : public GStringRep -{ -public: - // default constructor - Native(void); - // virtual destructor - virtual ~Native(); - - // Other virtual methods. - // Create an empty string. - virtual GP blank(const unsigned int sz = 0) const; - // Append a string. - virtual GP append(const GP &s2) const; - // Test if Native. - virtual bool isNative(void) const; - // Convert to Native. - virtual GP toNative( - const EscapeMode escape=UNKNOWN_ESCAPED) const; - // Convert to UTF8. - virtual GP toUTF8(const bool nothrow=false) const; - // Convert to UTF8. - virtual GP toThis( - const GP &rep,const GP &) const; - // Compare with #s2#. - virtual int cmp(const GP &s2, const int len=(-1)) const; - - // Convert strings to numbers. - virtual int toInt(void) const; - virtual long toLong( - const int pos, int &endpos, const int base=10) const; - virtual unsigned long toULong( - const int pos, int &endpos, const int base=10) const; - virtual double toDouble( - const int pos, int &endpos) const; - - // Create an empty string - static GP create(const unsigned int sz = 0); - - // Create a strdup string. - static GP create(const char *s); - - // Creates by appending to the current string - - // Creates with a concat operation. - static GP create( - const GP &s1,const GP &s2); - static GP create( const GP &s1,const char *s2); - static GP create( const char *s1, const GP &s2); - static GP create(const char *s1,const char *s2); - - // Create with a strdup and substr operation. - static GP create( - const char *s,const int start,const int length=(-1)); - static GP create( - const unsigned short *s,const int start,const int length=(-1)); - static GP create( - const unsigned long *s,const int start,const int length=(-1)); - - // Create with an sprintf() - static GP create_format(const char fmt[],...); - static GP create(const char fmt[],va_list &args); - - virtual unsigned char *UCS4toString( - const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const; - - // Tests if a string is legally encoded in the current character set. - virtual bool is_valid(void) const; - - virtual int ncopy(wchar_t * const buf, const int buflen) const; - - friend class GBaseString; -protected: - // Return the next character and increment the source pointer. - virtual unsigned long getValidUCS4(const char *&source) const; -}; - -GP -GStringRep::Native::create(const unsigned int sz) -{ - return GStringRep::create(sz,(GStringRep::Native *)0); -} - - // Create a strdup string. -GP -GStringRep::Native::create(const char *s) -{ - GStringRep::Native dummy; - return dummy.strdup(s); -} - -GP -GStringRep::Native::create(const GP &s1,const GP &s2) -{ - GStringRep::Native dummy; - return dummy.concat(s1,s2); -} - -GP -GStringRep::Native::create( const GP &s1,const char *s2) -{ - GStringRep::Native dummy; - return dummy.concat(s1,s2); -} - -GP -GStringRep::Native::create( const char *s1, const GP &s2) -{ - GStringRep::Native dummy; - return dummy.concat(s1,s2); -} - -GP -GStringRep::Native::create(const char *s1,const char *s2) -{ - GStringRep::Native dummy; - return dummy.concat(s1,s2); -} - -GP -GStringRep::Native::create( - const char *s,const int start,const int length) -{ - GStringRep::Native dummy; - return dummy.substr(s,start,length); -} - -GP -GStringRep::Native::create( - const unsigned short *s,const int start,const int length) -{ - GStringRep::Native dummy; - return dummy.substr(s,start,length); -} - -GP -GStringRep::Native::create( - const unsigned long *s,const int start,const int length) -{ - GStringRep::Native dummy; - return dummy.substr(s,start,length); -} - -GP -GStringRep::Native::blank(const unsigned int sz) const -{ - return GStringRep::create(sz,(GStringRep::Native *)0); -} - -bool -GStringRep::Native::isNative(void) const -{ - return true; -} - -GP -GStringRep::Native::toThis( - const GP &rep,const GP &) const -{ - return rep?(rep->toNative(NOT_ESCAPED)):rep; -} - -GP -GStringRep::Native::create(const char fmt[],va_list &args) -{ - const GP s(create(fmt)); - return (s?(s->vformat(args)):s); -} - -int -GStringRep::Native::ncopy( - wchar_t * const buf, const int buflen ) const -{ - return toUTF8()->ncopy(buf,buflen); -} - -GStringRep::ChangeLocale::ChangeLocale(const int xcategory, const char xlocale[] ) - : category(xcategory) -{ -#if DO_CHANGELOCALE - // This is disabled under UNIX because - // it does not play nice with MT. - if(xlocale) - { - locale=setlocale(xcategory,0); - if(locale.length() &&(locale!=xlocale)) - { - if(locale == setlocale(category,xlocale)) - { - locale.empty(); - } - } - else - { - locale.empty(); - } - } -#endif -} - -GStringRep::ChangeLocale::~ChangeLocale() -{ -#if DO_CHANGELOCALE - if(locale.length()) - { - setlocale(category,(const char *)locale); - } -#endif -} - -GNativeString & -GNativeString::format(const char fmt[], ... ) -{ - va_list args; - va_start(args, fmt); - return init(GStringRep::Native::create(fmt,args)); -} - -// Gather the native implementations here. Not used in WinCE. - -GStringRep::Native::Native(void) {} -GStringRep::Native::~Native() {} - -GP -GStringRep::Native::append(const GP &s2) const -{ - GP retval; - if(s2) - { - if(s2->isUTF8()) - { - G_THROW( ERR_MSG("GStringRep.appendUTF8toNative") ); - } - retval=concat(data,s2->data); - }else - { - retval=const_cast(this); - } - return retval; -} - -GP -GStringRep::Native::create_format(const char fmt[],...) -{ - va_list args; - va_start(args, fmt); - return create(fmt,args); -} - -unsigned char * -GStringRep::Native::UCS4toString( - const unsigned long w0,unsigned char *ptr, mbstate_t *ps) const -{ - return UCS4toNative(w0,ptr,ps); -} - -// Convert a UCS4 to a multibyte string in the value bytes. -// The data pointed to by ptr should be long enough to contain -// the results with a nill termination. (Normally 7 characters -// is enough.) -unsigned char * -GStringRep::UCS4toNative( - const unsigned long w0,unsigned char *ptr, mbstate_t *ps) -{ - unsigned short w1; - unsigned short w2=1; - for(int count=(sizeof(wchar_t)==sizeof(w1)) ? UCS4toUTF16(w0,w1,w2) : 1; - count; - --count,w1=w2) - { - // wchar_t can be either UCS4 or UCS2 - const wchar_t w=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0; - int i=wcrtomb((char *)ptr,w,ps); - if(i<0) - { - break; - } - ptr[i]=0; - ptr += i; - } - ptr[0]=0; - return ptr; -} - -GP -GStringRep::Native::toNative(const EscapeMode escape) const -{ - if(escape == UNKNOWN_ESCAPED) - G_THROW( ERR_MSG("GStringRep.NativeToNative") ); - return const_cast(this); -} - -GP -GStringRep::Native::toUTF8(const bool) const -{ - unsigned char *buf; - GPBuffer gbuf(buf,size*6+1); - buf[0]=0; - if(data && size) - { - size_t n=size; - const char *source=data; - mbstate_t ps; - unsigned char *ptr=buf; - //(void)mbrlen(source, n, &ps); - memset(&ps,0,sizeof(mbstate_t)); - int i=0; - if(sizeof(wchar_t) == sizeof(unsigned long)) - { - wchar_t w = 0; - for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0); n-=i,source+=i) - { - ptr=UCS4toUTF8(w,ptr); - } - } - else - { - wchar_t w = 0; - for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0);n-=i,source+=i) - { - unsigned short s[2]; - s[0]=w; - unsigned long w0; - if(UTF16toUCS4(w0,s,s+1)<=0) - { - source+=i; - n-=i; - if((n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0)) - { - s[1]=w; - if(UTF16toUCS4(w0,s,s+2)<=0) - { - i=(-1); - break; - } - } - else - { - i=(-1); - break; - } - } - ptr=UCS4toUTF8(w0,ptr); - } - } - if(i<0) - { - gbuf.resize(0); - } - else - { - ptr[0]=0; - } - } - return GStringRep::UTF8::create((const char *)buf); -} - -GNativeString -GBaseString::UTF8ToNative( - const bool currentlocale,const EscapeMode escape) const -{ - const char *source=(*this); - GP retval; - if(source && source[0]) - { -#if DO_CHANGELOCALE - GUTF8String lc_ctype(setlocale(LC_CTYPE,0)); -#endif - bool repeat; - for(repeat=!currentlocale;;repeat=false) - { - retval=(*this)->toNative((GStringRep::EscapeMode)escape); -#if DO_CHANGELOCALE - if (!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,""))) -#endif - break; - } -#if DO_CHANGELOCALE - if(!repeat) - { - setlocale(LC_CTYPE,(const char *)lc_ctype); - } -#endif - } - return GNativeString(retval); -} - -/*MBCS*/ -GNativeString -GBaseString::getUTF82Native( EscapeMode escape ) const -{ //MBCS cvt - GNativeString retval; - - // We don't want to convert this if it - // already is known to be native... -// if (isNative()) return *this; - - const size_t slen=length()+1; - if(slen>1) - { - retval=UTF8ToNative(false,escape) ; - if(!retval.length()) - { - retval=(const char*)*this; - } - } - return retval; -} - -GUTF8String -GBaseString::NativeToUTF8(void) const -{ - GP retval; - if(length()) - { - const char *source=(*this); -#if DO_CHANGELOCALE - GUTF8String lc_ctype=setlocale(LC_CTYPE,0); -#endif - bool repeat; - for(repeat=true;;repeat=false) - { - if( (retval=GStringRep::NativeToUTF8(source)) ) - { - if(GStringRep::cmp(retval->toNative(),source)) - { - retval=GStringRep::UTF8::create((unsigned int)0); - } - } -#if DO_CHANGELOCALE - if(!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,""))) -#endif - break; - } -#if DO_CHANGELOCALE - if(!repeat) - { - setlocale(LC_CTYPE,(const char *)lc_ctype); - } -#endif - } - return GUTF8String(retval); -} - -GUTF8String -GBaseString::getNative2UTF8(void) const -{ //MBCS cvt - - // We don't want to do a transform this - // if we already are in the given type. -// if (isUTF8()) return *this; - - const size_t slen=length()+1; - GUTF8String retval; - if(slen > 1) - { - retval=NativeToUTF8(); - if(!retval.length()) - { - retval=(const char *)(*this); - } - } - return retval; -} /*MBCS*/ - -int -GStringRep::Native::cmp(const GP &s2,const int len) const -{ - int retval; - if(s2) - { - if(s2->isUTF8()) - { - const GP r(toUTF8(true)); - if(r) - { - retval=GStringRep::cmp(r->data,s2->data,len); - }else - { - retval=cmp(s2->toNative(NOT_ESCAPED),len); - } - }else - { - retval=GStringRep::cmp(data,s2->data,len); - } - }else - { - retval=GStringRep::cmp(data,0,len); - } - return retval; -} - -int -GStringRep::Native::toInt() const -{ - return atoi(data); -} - -long -GStringRep::Native::toLong( - const int pos, int &endpos, const int base) const -{ - char *edata=0; - const long retval=strtol(data+pos, &edata, base); - if(edata) - { - endpos=(int)((size_t)edata-(size_t)data); - }else - { - endpos=(-1); - } - return retval; -} - -unsigned long -GStringRep::Native::toULong( - const int pos, int &endpos, const int base) const -{ - char *edata=0; - const unsigned long retval=strtoul(data+pos, &edata, base); - if(edata) - { - endpos=(int)((size_t)edata-(size_t)data); - }else - { - endpos=(-1); - } - return retval; -} - -double -GStringRep::Native::toDouble( - const int pos, int &endpos) const -{ - char *edata=0; - const double retval=strtod(data+pos, &edata); - if(edata) - { - endpos=(int)((size_t)edata-(size_t)data); - }else - { - endpos=(-1); - } - return retval; -} - -unsigned long -GStringRep::Native::getValidUCS4(const char *&source) const -{ - unsigned long retval=0; - int n=(int)((size_t)size+(size_t)data-(size_t)source); - if(source && (n > 0)) - { - mbstate_t ps; - //(void)mbrlen(source, n, &ps); - memset(&ps,0,sizeof(mbstate_t)); - wchar_t wt; - const int len=mbrtowc(&wt,source,n,&ps); - if(len>=0) - { - if(sizeof(wchar_t) == sizeof(unsigned short)) - { - source+=len; - unsigned short s[2]; - s[0]=(unsigned short)wt; - if(UTF16toUCS4(retval,s,s+1)<=0) - { - if((n-=len)>0) - { - const int len=mbrtowc(&wt,source,n,&ps); - if(len>=0) - { - s[1]=(unsigned short)wt; - unsigned long w; - if(UTF16toUCS4(w,s,s+2)>0) - { - source+=len; - retval=w; - } - } - } - } - }else - { - retval=(unsigned long)wt; - source++; - } - }else - { - source++; - } - } - return retval; -} - -// Tests if a string is legally encoded in the current character set. -bool -GStringRep::Native::is_valid(void) const -{ - bool retval=true; - if(data && size) - { - size_t n=size; - const char *s=data; - mbstate_t ps; - //(void)mbrlen(s, n, &ps); - memset(&ps,0,sizeof(mbstate_t)); - do - { - size_t m=mbrlen(s,n,&ps); - if(m > n) - { - retval=false; - break; - }else if(m) - { - s+=m; - n-=m; - }else - { - break; - } - } while(n); - } - return retval; -} - -// These are dummy functions. -void -GStringRep::set_remainder(void const * const, const unsigned int, - const EncodeType) {} -void -GStringRep::set_remainder(void const * const, const unsigned int, - const GP &encoding) {} -void -GStringRep::set_remainder( const GP &) {} - -GP -GStringRep::get_remainder( void ) const -{ - return 0; -} - -GNativeString::GNativeString(const char dat) -{ - init(GStringRep::Native::create(&dat,0,1)); -} - -GNativeString::GNativeString(const char *str) -{ - init(GStringRep::Native::create(str)); -} - -GNativeString::GNativeString(const unsigned char *str) -{ - init(GStringRep::Native::create((const char *)str)); -} - -GNativeString::GNativeString(const unsigned short *str) -{ - init(GStringRep::Native::create(str,0,-1)); -} - -GNativeString::GNativeString(const unsigned long *str) -{ - init(GStringRep::Native::create(str,0,-1)); -} - -GNativeString::GNativeString(const char *dat, unsigned int len) -{ - init( - GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); -} - -GNativeString::GNativeString(const unsigned short *dat, unsigned int len) -{ - init( - GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); -} - -GNativeString::GNativeString(const unsigned long *dat, unsigned int len) -{ - init( - GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); -} - -GNativeString::GNativeString(const GNativeString &str) -{ - init(str); -} - -GNativeString::GNativeString(const GBaseString &gs, int from, int len) -{ - init( - GStringRep::Native::create(gs,from,((int)len<0)?(-1):(int)len)); -} - -GNativeString::GNativeString(const int number) -{ - init(GStringRep::Native::create_format("%d",number)); -} - -GNativeString::GNativeString(const double number) -{ - init(GStringRep::Native::create_format("%f",number)); -} - -GNativeString& -GNativeString::operator= (const char str) -{ return init(GStringRep::Native::create(&str,0,1)); } - -GNativeString& -GNativeString::operator= (const char *str) -{ return init(GStringRep::Native::create(str)); } - -GNativeString -GBaseString::operator+(const GNativeString &s2) const -{ - return GStringRep::Native::create(*this,s2); -} - -GP -GStringRep::NativeToUTF8( const char *s ) -{ - return GStringRep::Native::create(s)->toUTF8(); -} - -#endif // HAS_WCHAR - -template -GP -GStringRep::create(const unsigned int sz, TYPE *) -{ - GP gaddr; - if (sz > 0) - { - GStringRep *addr; - gaddr=(addr=new TYPE); - addr->data=(char *)(::operator new(sz+1)); - addr->size = sz; - addr->data[sz] = 0; - } - return gaddr; -} - -GP -GStringRep::strdup(const char *s) const -{ - GP retval; - const int length=s?strlen(s):0; - if(length>0) - { - retval=blank(length); - char const * const end=s+length; - char *ptr=retval->data; - for(;*s&&(s!=end);ptr++) - { - ptr[0]=s++[0]; - } - ptr[0]=0; - } - return retval; -} - -GP -GStringRep::substr(const char *s,const int start,const int len) const -{ - GP retval; - if(s && s[0]) - { - const unsigned int length=(start<0 || len<0)?(unsigned int)strlen(s):(unsigned int)(-1); - const char *startptr, *endptr; - if(start<0) - { - startptr=s+length+start; - if(startptrstartptr) - { - retval=blank((size_t)(endptr-startptr)); - char *data=retval->data; - for(; (startptr -GStringRep::substr(const unsigned short *s,const int start,const int len) const -{ - GP retval; - if(s && s[0]) - { - unsigned short const *eptr; - if(len<0) - { - for(eptr=s;eptr[0];++eptr) - EMPTY_LOOP; - }else - { - eptr=&(s[len]); - } - s=&s[start]; - if((size_t)s<(size_t)eptr) - { - mbstate_t ps; - memset(&ps,0,sizeof(mbstate_t)); - unsigned char *buf,*ptr; - GPBuffer gbuf(buf,(((size_t)eptr-(size_t)s)/2)*3+7); - for(ptr=buf;s[0];) - { - unsigned long w; - int i=UTF16toUCS4(w,s,eptr); - if(i<=0) - break; - s+=i; - ptr=UCS4toString(w,ptr,&ps); - } - ptr[0]=0; - retval = strdup( (const char *)buf ); - } - } - return retval; -} - -GP -GStringRep::substr(const unsigned long *s,const int start,const int len) const -{ - GP retval; - if(s && s[0]) - { - unsigned long const *eptr; - if(len<0) - { - for(eptr=s;eptr[0];++eptr) - EMPTY_LOOP; - }else - { - eptr=&(s[len]); - } - s=&s[start]; - if((size_t)s<(size_t)eptr) - { - mbstate_t ps; - memset(&ps,0,sizeof(mbstate_t)); - unsigned char *buf,*ptr; - GPBuffer gbuf(buf,((((size_t)eptr-(size_t)s))/4)*6+7); - for(ptr=buf;s[0];++s) - { - ptr=UCS4toString(s[0],ptr,&ps); - } - ptr[0]=0; - retval = strdup( (const char *)buf ); - } - } - return retval; -} - -GP -GStringRep::append(const char *s2) const -{ - GP retval; - if(s2) - { - retval=concat(data,s2); - }else - { - retval=const_cast(this); - } - return retval; -} - -GP -GStringRep::UTF8::append(const GP &s2) const -{ - GP retval; - if(s2) - { - if(s2->isNative()) - { - G_THROW( ERR_MSG("GStringRep.appendNativeToUTF8") ); - } - retval=concat(data,s2->data); - }else - { - retval=const_cast(this); - } - return retval; -} - -GP -GStringRep::concat(const char *s1,const char *s2) const -{ - const int length1=(s1?strlen(s1):0); - const int length2=(s2?strlen(s2):0); - const int length=length1+length2; - GP retval; - if(length>0) - { - retval=blank(length); - GStringRep &r=*retval; - if(length1) - { - strcpy(r.data,s1); - if(length2) - strcat(r.data,s2); - }else - { - strcpy(r.data,s2); - } - } - return retval; -} - -const char *GBaseString::nullstr = ""; - -void -GBaseString::empty( void ) -{ - init(0); -} - -GP -GStringRep::getbuf(int n) const -{ - GP retval; - if(n< 0) - n=strlen(data); - if(n>0) - { - retval=blank(n); - char *ndata=retval->data; - strncpy(ndata,data,n); - ndata[n]=0; - } - return retval; -} - -const char * -GStringRep::isCharType( - bool (*xiswtest)(const unsigned long wc), const char *ptr, const bool reverse) const -{ - char const * xptr=ptr; - const unsigned long w=getValidUCS4(xptr); - if((ptr != xptr) - &&(((sizeof(wchar_t) == 2)&&(w&~0xffff)) - ||(reverse?(!xiswtest(w)):xiswtest(w)))) - { - ptr=xptr; - } - return ptr; -} - -int -GStringRep::nextCharType( - bool (*xiswtest)(const unsigned long wc), const int from, const int len, - const bool reverse) const -{ - // We want to return the position of the next - // non white space starting from the #from# - // location. isspace should work in any locale - // so we should only need to do this for the non- - // native locales (UTF8) - int retval; - if(from 0xff) - _w = (unsigned char)w; - - return - (w&~0xff)?(true):(((unsigned long)isspace(_w))||((w == '\r')||(w == '\n'))); -#endif -} - -bool -GStringRep::giswupper(const unsigned long w) -{ -#if HAS_WCTYPE - return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) - ?(true):((unsigned long)iswupper((wchar_t)w)?true:false); -#else - return (w&~0xff)?(true):((unsigned long)isupper((char)w)?true:false); -#endif -} - -bool -GStringRep::giswlower(const unsigned long w) -{ -#if HAS_WCTYPE - return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) - ?(true):((unsigned long)iswlower((wchar_t)w)?true:false); -#else - return (w&~0xff)?(true):((unsigned long)islower((char)w)?true:false); -#endif -} - -unsigned long -GStringRep::gtowupper(const unsigned long w) -{ -#if HAS_WCTYPE - return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) - ?w:((unsigned long)towupper((wchar_t)w)); -#else - return (w&~0xff)?w:((unsigned long)toupper((char)w)); -#endif -} - -unsigned long -GStringRep::gtowlower(const unsigned long w) -{ -#if HAS_WCTYPE - return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) - ?w:((unsigned long)towlower((wchar_t)w)); -#else - return (w&~0xff)?w:((unsigned long)tolower((char)w)); -#endif -} - -GP -GStringRep::tocase( - bool (*xiswcase)(const unsigned long wc), - unsigned long (*xtowcase)(const unsigned long wc)) const -{ - GP retval; - char const * const eptr=data+size; - char const *ptr=data; - while(ptr gbuf(buf,n+(1+size-n)*6); - if(n>0) - { - strncpy((char *)buf,data,n); - } - unsigned char *buf_ptr=buf+n; - for(char const *ptr=data+n;ptr(this); - } - return retval; -} - -// Returns a copy of this string with characters used in XML escaped as follows: -// '<' --> "<" -// '>' --> ">" -// '&' --> "&" -// '\'' --> "'" -// '\"' --> """ -// Also escapes characters 0x00 through 0x1f and 0x7e through 0x7f. -GP -GStringRep::toEscaped( const bool tosevenbit ) const -{ - bool modified=false; - char *ret; - GPBuffer gret(ret,size*7); - ret[0]=0; - char *retptr=ret; - char const *start=data; - char const *s=start; - char const *last=s; - GP special; - for(unsigned long w;(w=getValidUCS4(s));last=s) - { - char const *ss=0; - switch(w) - { - case '<': - ss="<"; - break; - case '>': - ss=">"; - break; - case '&': - ss="&"; - break; - case '\47': - ss="'"; - break; - case '\42': - ss="""; - break; - default: - if((w<' ')||(w>=0x7e && (tosevenbit || (w < 0x80)))) - { - special=toThis(UTF8::create_format("&#%lu;",w)); - ss=special->data; - } - break; - } - if(ss) - { - modified=true; - if(s!=start) - { - size_t len=(size_t)last-(size_t)start; - strncpy(retptr,start,len); - retptr+=len; - start=s; - } - if(ss[0]) - { - size_t len=strlen(ss); - strcpy(retptr,ss); - retptr+=len; - } - } - } - GP retval; - if(modified) - { - strcpy(retptr,start); - retval=strdup( ret ); - }else - { - retval=const_cast(this); - } -// DEBUG_MSG( "Escaped string is '" << ret << "'\n" ); - return retval; -} - - -static const GMap & -BasicMap( void ) -{ - static GMap Basic; - if (! Basic.size()) - { - Basic["lt"] = GUTF8String('<'); - Basic["gt"] = GUTF8String('>'); - Basic["amp"] = GUTF8String('&'); - Basic["apos"] = GUTF8String('\47'); - Basic["quot"] = GUTF8String('\42'); - } - return Basic; -} - -GUTF8String -GUTF8String::fromEscaped( const GMap ConvMap ) const -{ - GUTF8String ret; // Build output string here - int start_locn = 0; // Beginning of substring to skip - int amp_locn; // Location of a found ampersand - - while( (amp_locn = search( '&', start_locn )) > -1 ) - { - // Found the next apostrophe - // Locate the closing semicolon - const int semi_locn = search( ';', amp_locn ); - // No closing semicolon, exit and copy - // the rest of the string. - if( semi_locn < 0 ) - break; - ret += substr( start_locn, amp_locn - start_locn ); - int const len = semi_locn - amp_locn - 1; - if(len) - { - GUTF8String key = substr( amp_locn+1, len); - //DEBUG_MSG( "key = '" << key << "'\n" ); - char const * s=key; - if( s[0] == '#') - { - unsigned long value; - char *ptr=0; - if(s[1] == 'x' || s[1] == 'X') - { - value=strtoul((char const *)(s+2),&ptr,16); - }else - { - value=strtoul((char const *)(s+1),&ptr,10); - } - if(ptr) - { - unsigned char utf8char[7]; - unsigned char const * const end=GStringRep::UCS4toUTF8(value,utf8char); - ret+=GUTF8String((char const *)utf8char,(size_t)end-(size_t)utf8char); - }else - { - ret += substr( amp_locn, semi_locn - amp_locn + 1 ); - } - }else - { - GPosition map_entry = ConvMap.contains( key ); - if( map_entry ) - { // Found in the conversion map, substitute - ret += ConvMap[map_entry]; - } else - { - static const GMap &Basic = BasicMap(); - GPosition map_entry = Basic.contains( key ); - if ( map_entry ) - { - ret += Basic[map_entry]; - }else - { - ret += substr( amp_locn, len+2 ); - } - } - } - }else - { - ret += substr( amp_locn, len+2 ); - } - start_locn = semi_locn + 1; -// DEBUG_MSG( "ret = '" << ret << "'\n" ); - } - - // Copy the end of the string to the output - ret += substr( start_locn, length()-start_locn ); - -// DEBUG_MSG( "Unescaped string is '" << ret << "'\n" ); - return (ret == *this)?(*this):ret; -} - -GUTF8String -GUTF8String::fromEscaped(void) const -{ - const GMap nill; - return fromEscaped(nill); -} - -GP -GStringRep::setat(int n, char ch) const -{ - GP retval; - if(n<0) - n+=size; - if (n < 0 || n>size) - GBaseString::throw_illegal_subscript(); - if(ch == data[n]) - { - retval=const_cast(this); - }else if(!ch) - { - retval=getbuf(n); - }else - { - retval=getbuf((ndata[n]=ch; - if(n == size) - retval->data[n+1]=0; - } - return retval; -} - -#ifndef WASM_MODE -#if defined(AUTOCONF) && defined(HAVE_VSNPRINTF) -# define USE_VSNPRINTF vsnprintf -#elif defined(WIN32) && !defined(__CYGWIN32__) -# define USE_VSNPRINTF _vsnprintf -#elif defined(linux) -# define USE_VSNPRINTF vsnprintf -#endif -#endif - -GUTF8String & -GUTF8String::format(const char fmt[], ... ) -{ - va_list args; - va_start(args, fmt); - return init(GStringRep::UTF8::create(fmt,args)); -} - -GP -GStringRep::UTF8::create_format(const char fmt[],...) -{ - va_list args; - va_start(args, fmt); - return create(fmt,args); -} - -GP -GStringRep::vformat(va_list args) const -{ - GP retval; - if(size) - { - char const * const fmt=data; - int buflen=32768; - char *buffer; - GPBuffer gbuffer(buffer,buflen); - ChangeLocale locale(LC_NUMERIC,(isNative()?0:"C")); - // Format string -#ifdef USE_VSNPRINTF - while(USE_VSNPRINTF(buffer, buflen, fmt, args)<0) - { - gbuffer.resize(0); - gbuffer.resize(buflen+32768); - } - va_end(args); -#else - buffer[buflen-1] = 0; - vsprintf(buffer, fmt, args); - va_end(args); - if (buffer[buflen-1]) - { - // This isn't as fatal since it is on the stack, but we - // definitely should stop the current operation. - G_THROW( ERR_MSG("GString.overwrite") ); - } -#endif - retval=strdup((const char *)buffer); - } - // Go altering the string - return retval; -} - -int -GStringRep::search(char c, int from) const -{ - if (from<0) - from += size; - int retval=(-1); - if (from>=0 && from=0 && from=0) && (from= 0;++loc) - retval=loc; - return retval; -} - -int -GStringRep::contains(const char accept[],int from) const -{ - if(from<0) - { - from+=size; - if(from<0) - G_THROW( ERR_MSG("GString.bad_subscript") ); - } - int retval=(-1); - if (accept && accept[0] && from>=0 && from= 0) - { - retval=from++; - } - return retval; -} - -bool -GBaseString::is_int(void) const -{ - bool isLong=!!ptr; - if(isLong) - { - int endpos; - (*this)->toLong(0,endpos); - if(endpos>=0) - { - isLong=((*this)->nextNonSpace(endpos) == (int)length()); - } - } - return isLong; -} - -bool -GBaseString::is_float(void) const -{ - bool isDouble=!!ptr; - if(isDouble) - { - int endpos; - (*this)->toDouble(0,endpos); - if(endpos>=0) - { - isDouble=((*this)->nextNonSpace(endpos) == (int)length()); - } - } - return isDouble; -} - -unsigned int -hash(const GBaseString &str) -{ - unsigned int x = 0; - const char *s = (const char*)str; - while (*s) - x = x ^ (x<<6) ^ (unsigned char)(*s++); - return x; -} - -void -GBaseString::throw_illegal_subscript() -{ - G_THROW( ERR_MSG("GString.bad_subscript") ); -} - -unsigned char * -GStringRep::UTF8::UCS4toString( - const unsigned long w0,unsigned char *ptr, mbstate_t *) const -{ - return UCS4toUTF8(w0,ptr); -} - -int -GStringRep::UTF8::ncopy( - wchar_t * const buf, const int buflen ) const -{ - int retval=(-1); - if(buf && buflen) - { - buf[0]=0; - if(data[0]) - { - const size_t length=strlen(data); - const unsigned char * const eptr=(const unsigned char *)(data+length); - wchar_t *r=buf; - wchar_t const * const rend=buf+buflen; - for(const unsigned char *s=(const unsigned char *)data;(r -GStringRep::UTF8::toNative(const EscapeMode escape) const -{ - GP retval; - if(data[0]) - { - const size_t length=strlen(data); - const unsigned char * const eptr=(const unsigned char *)(data+length); - unsigned char *buf; - GPBuffer gbuf(buf,12*length+12); - unsigned char *r=buf; - mbstate_t ps; - memset(&ps,0,sizeof(mbstate_t)); - for(const unsigned char *s=(const unsigned char *)data;(s -GStringRep::UTF8::toUTF8(const bool nothrow) const -{ - if(!nothrow) - G_THROW( ERR_MSG("GStringRep.UTF8ToUTF8") ); - return const_cast(this); -} - -// Tests if a string is legally encoded in the current character set. -bool -GStringRep::UTF8::is_valid(void) const -{ - bool retval=true; - if(data && size) - { - const unsigned char * const eptr=(const unsigned char *)(data+size); - for(const unsigned char *s=(const unsigned char *)data;(s>6)|0xC0); - *ptr++ = (unsigned char)((w|0x80)&0xBF); - } - else if(w <= 0xFFFF) - { - *ptr++ = (unsigned char)((w>>12)|0xE0); - *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); - *ptr++ = (unsigned char)((w|0x80)&0xBF); - } - else if(w <= 0x1FFFFF) - { - *ptr++ = (unsigned char)((w>>18)|0xF0); - *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); - *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); - *ptr++ = (unsigned char)((w|0x80)&0xBF); - } - else if(w <= 0x3FFFFFF) - { - *ptr++ = (unsigned char)((w>>24)|0xF8); - *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF); - *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); - *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); - *ptr++ = (unsigned char)((w|0x80)&0xBF); - } - else if(w <= 0x7FFFFFFF) - { - *ptr++ = (unsigned char)((w>>30)|0xFC); - *ptr++ = (unsigned char)(((w>>24)|0x80)&0xBF); - *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF); - *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); - *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); - *ptr++ = (unsigned char)((w|0x80)&0xBF); - } - else - { - *ptr++ = '?'; - } - return ptr; -} - - // Creates with a concat operation. -GP -GStringRep::concat( const char *s1, const GP &s2) const -{ - GP retval; - if(s2) - { - retval=toThis(s2); - if(s1 && s1[0]) - { - if(retval) - { - retval=concat(s1,retval->data); - }else - { - retval=strdup(s1); - } - } - }else if(s1 && s1[0]) - { - retval=strdup(s1); - } - return retval; -} - - // Creates with a concat operation. - -GP -GStringRep::concat( const GP &s1,const char *s2) const -{ - GP retval; - if(s1) - { - retval=toThis(s1); - if(s2 && s2[0]) - { - if(retval) - { - retval=retval->append(s2); - }else - { - retval=strdup(s2); - } - } - }else if(s2 && s2[0]) - { - retval=strdup(s2); - } - return retval; -} - -GP -GStringRep::concat(const GP &s1,const GP &s2) const -{ - GP retval; - if(s1) - { - retval=toThis(s1,s2); - if(retval && s2) - { - retval=retval->append(toThis(s2)); - } - }else if(s2) - { - retval=toThis(s2); - } - return retval; -} - -//< Changed for WinDjView project -//#ifdef WIN32 -#if 0 -//> -static const char *setlocale_win32(void) -{ - static const char *locale=setlocale(LC_ALL,0); - if(! locale || (locale[0] == 'C' && !locale[1])) - { - locale=setlocale(LC_ALL,""); - } - return locale; -} -const char *setlocale_win32_var = setlocale_win32(); -#endif - -GStringRep::GStringRep(void) -{ - size=0; - data=0; -} - -GStringRep::~GStringRep() -{ - if(data) - { - data[0]=0; - ::operator delete(data); - } - data=0; -} - -GStringRep::UTF8::UTF8(void) {} - -GStringRep::UTF8::~UTF8() {} - -int -GStringRep::cmp(const char *s1,const int len) const -{ - return cmp(data,s1,len); -} - -int -GStringRep::cmp(const char *s1, const char *s2,const int len) -{ - return (len - ?((s1&&s1[0]) - ?((s2&&s2[0]) - ?((len>0) - ?strncmp(s1,s2,len) - :strcmp(s1,s2)) - :1) - :((s2&&s2[0])?(-1):0)) - :0); -} - -int -GStringRep::cmp(const GP &s1, const GP &s2, - const int len ) -{ - return (s1?(s1->cmp(s2,len)):cmp(0,(s2?(s2->data):0),len)); -} - -int -GStringRep::cmp(const GP &s1, const char *s2, - const int len ) -{ - return cmp((s1?s1->data:0),s2,len); -} - -int -GStringRep::cmp(const char *s1, const GP &s2, - const int len ) -{ - return cmp(s1,(s2?(s2->data):0),len); -} - -int -GStringRep::UTF8::cmp(const GP &s2,const int len) const -{ - int retval; - if(s2) - { - if(s2->isNative()) - { - GP r(s2->toUTF8(true)); - if(r) - { - retval=GStringRep::cmp(data,r->data,len); - }else - { - retval=-(s2->cmp(toNative(NOT_ESCAPED),len)); - } - }else - { - retval=GStringRep::cmp(data,s2->data,len); - } - }else - { - retval=GStringRep::cmp(data,0,len); - } - return retval; -} - -int -GStringRep::UTF8::toInt() const -{ - int endpos; - return (int)toLong(0,endpos); -} - -static inline long -Cstrtol(char *data,char **edata, const int base) -{ - GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); - while (data && *data==' ') data++; - return strtol(data,edata,base); -} - -long -GStringRep::UTF8::toLong( - const int pos, int &endpos, const int base) const -{ - char *edata=0; - long retval=Cstrtol(data+pos,&edata, base); - if(edata) - { - endpos=edata-data; - }else - { - endpos=(-1); - GP ptr=ptr->strdup(data+pos); - if(ptr) - ptr=ptr->toNative(NOT_ESCAPED); - if(ptr) - { - int xendpos; - retval=ptr->toLong(0,xendpos,base); - if(xendpos> 0) - { - endpos=(int)size; - ptr=ptr->strdup(data+xendpos); - if(ptr) - { - ptr=ptr->toUTF8(true); - if(ptr) - { - endpos-=(int)(ptr->size); - } - } - } - } - } - return retval; -} - -static inline unsigned long -Cstrtoul(char *data,char **edata, const int base) -{ - GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); - while (data && *data==' ') data++; - return strtoul(data,edata,base); -} - -unsigned long -GStringRep::UTF8::toULong( - const int pos, int &endpos, const int base) const -{ - char *edata=0; - unsigned long retval=Cstrtoul(data+pos,&edata, base); - if(edata) - { - endpos=edata-data; - }else - { - endpos=(-1); - GP ptr=ptr->strdup(data+pos); - if(ptr) - ptr=ptr->toNative(NOT_ESCAPED); - if(ptr) - { - int xendpos; - retval=ptr->toULong(0,xendpos,base); - if(xendpos> 0) - { - endpos=(int)size; - ptr=ptr->strdup(data+xendpos); - if(ptr) - { - ptr=ptr->toUTF8(true); - if(ptr) - { - endpos-=(int)(ptr->size); - } - } - } - } - } - return retval; -} - -static inline double -Cstrtod(char *data,char **edata) -{ - GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); - while (data && *data==' ') data++; - return strtod(data,edata); -} - -double -GStringRep::UTF8::toDouble(const int pos, int &endpos) const -{ - char *edata=0; - double retval=Cstrtod(data+pos,&edata); - if(edata) - { - endpos=edata-data; - }else - { - endpos=(-1); - GP ptr=ptr->strdup(data+pos); - if(ptr) - ptr=ptr->toNative(NOT_ESCAPED); - if(ptr) - { - int xendpos; - retval=ptr->toDouble(0,xendpos); - if(xendpos >= 0) - { - endpos=(int)size; - ptr=ptr->strdup(data+xendpos); - if(ptr) - { - ptr=ptr->toUTF8(true); - if(ptr) - { - endpos-=(int)(ptr->size); - } - } - } - } - } - return retval; -} - -int -GStringRep::getUCS4(unsigned long &w, const int from) const -{ - int retval; - if(from>=size) - { - w=0; - retval=size; - }else if(from<0) - { - w=(unsigned int)(-1); - retval=(-1); - }else - { - const char *source=data+from; - w=getValidUCS4(source); - retval=(int)((size_t)source-(size_t)data); - } - return retval; -} - - -unsigned long -GStringRep::UTF8::getValidUCS4(const char *&source) const -{ - return GStringRep::UTF8toUCS4((const unsigned char *&)source,data+size); -} - -int -GStringRep::nextNonSpace(const int from,const int len) const -{ - return nextCharType(giswspace,from,len,true); -} - -int -GStringRep::nextSpace(const int from,const int len) const -{ - return nextCharType(giswspace,from,len,false); -} - -int -GStringRep::nextChar(const int from) const -{ - char const * xptr=data+from; - (void)getValidUCS4(xptr); - return (int)((size_t)xptr-(size_t)data); -} - -int -GStringRep::firstEndSpace(int from,const int len) const -{ - const int xsize=(len<0)?size:(from+len); - const int ysize=(size>10)&0x3ff)+0xD800); - w2=(unsigned short)((w&0x3ff)+0xDC00); - retval=2; - } - return retval; -} - -int -GStringRep::UTF16toUCS4( - unsigned long &U,unsigned short const * const s,void const * const eptr) -{ - int retval=0; - U=0; - unsigned short const * const r=s+1; - if(r <= eptr) - { - unsigned long const W1=s[0]; - if((W1<0xD800)||(W1>0xDFFF)) - { - if((U=W1)) - { - retval=1; - } - }else if(W1<=0xDBFF) - { - unsigned short const * const rr=r+1; - if(rr <= eptr) - { - unsigned long const W2=s[1]; - if(((W2>=0xDC00)||(W2<=0xDFFF))&&((U=(0x10000+((W1&0x3ff)<<10))|(W2&0x3ff)))) - { - retval=2; - }else - { - retval=(-1); - } - } - } - } - return retval; -} - - -//bcr - -GUTF8String& -GUTF8String::operator+= (char ch) -{ - return init( - GStringRep::UTF8::create((const char*)*this, - GStringRep::UTF8::create(&ch,0,1))); -} - -GUTF8String& -GUTF8String::operator+= (const char *str) -{ - return init(GStringRep::UTF8::create(*this,str)); -} - -GUTF8String& -GUTF8String::operator+= (const GBaseString &str) -{ - return init(GStringRep::UTF8::create(*this,str)); -} - -GUTF8String -GUTF8String::substr(int from, int len) const -{ return GUTF8String(*this, from, len); } - -GUTF8String -GUTF8String::operator+(const GBaseString &s2) const -{ return GStringRep::UTF8::create(*this,s2); } - -GUTF8String -GUTF8String::operator+(const GUTF8String &s2) const -{ return GStringRep::UTF8::create(*this,s2); } - -GUTF8String -GUTF8String::operator+(const char *s2) const -{ return GStringRep::UTF8::create(*this,s2); } - -char * -GUTF8String::getbuf(int n) -{ - if(ptr) - init((*this)->getbuf(n)); - else if(n>0) - init(GStringRep::UTF8::create(n)); - else - init(0); - return ptr?((*this)->data):0; -} - -void -GUTF8String::setat(const int n, const char ch) -{ - if((!n)&&(!ptr)) - { - init(GStringRep::UTF8::create(&ch,0,1)); - }else - { - init((*this)->setat(CheckSubscript(n),ch)); - } -} - -GP -GStringRep::UTF8ToNative( const char *s, const EscapeMode escape ) -{ - return GStringRep::UTF8::create(s)->toNative(escape); -} - -GUTF8String::GUTF8String(const char dat) -{ init(GStringRep::UTF8::create(&dat,0,1)); } - -GUTF8String::GUTF8String(const GUTF8String &fmt, va_list &args) -{ - if (fmt.ptr) - init(fmt->vformat(args)); - else - init(fmt); -} - -GUTF8String::GUTF8String(const char *str) -{ init(GStringRep::UTF8::create(str)); } - -GUTF8String::GUTF8String(const unsigned char *str) -{ init(GStringRep::UTF8::create((const char *)str)); } - -GUTF8String::GUTF8String(const unsigned short *str) -{ init(GStringRep::UTF8::create(str,0,-1)); } - -GUTF8String::GUTF8String(const unsigned long *str) -{ init(GStringRep::UTF8::create(str,0,-1)); } - -GUTF8String::GUTF8String(const char *dat, unsigned int len) -{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } - -GUTF8String::GUTF8String(const unsigned short *dat, unsigned int len) -{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } - -GUTF8String::GUTF8String(const unsigned long *dat, unsigned int len) -{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } - -GUTF8String::GUTF8String(const GBaseString &gs, int from, int len) -{ init(GStringRep::UTF8::create(gs,from,((int)len<0)?(-1):(int)len)); } - -GUTF8String::GUTF8String(const int number) -{ init(GStringRep::UTF8::create_format("%d",number)); } - -GUTF8String::GUTF8String(const double number) -{ init(GStringRep::UTF8::create_format("%f",number)); } - -GUTF8String& GUTF8String::operator= (const char str) -{ return init(GStringRep::UTF8::create(&str,0,1)); } - -GUTF8String& GUTF8String::operator= (const char *str) -{ return init(GStringRep::UTF8::create(str)); } - -GUTF8String GBaseString::operator+(const GUTF8String &s2) const -{ return GStringRep::UTF8::create(*this,s2); } - -#if HAS_WCHAR -GUTF8String -GNativeString::operator+(const GUTF8String &s2) const -{ - if (ptr) - return GStringRep::UTF8::create((*this)->toUTF8(true),s2); - else - return GStringRep::UTF8::create((*this),s2); -} -#endif - -GUTF8String -GUTF8String::operator+(const GNativeString &s2) const -{ - GP g = s2; - if (s2.ptr) - g = s2->toUTF8(true); - return GStringRep::UTF8::create(*this,g); -} - -GUTF8String -operator+(const char *s1, const GUTF8String &s2) -{ return GStringRep::UTF8::create(s1,s2); } - -#if HAS_WCHAR -GNativeString -operator+(const char *s1, const GNativeString &s2) -{ return GStringRep::Native::create(s1,s2); } - -GNativeString& -GNativeString::operator+= (char ch) -{ - char s[2]; s[0]=ch; s[1]=0; - return init(GStringRep::Native::create((const char*)*this, s)); -} - -GNativeString& -GNativeString::operator+= (const char *str) -{ - return init(GStringRep::Native::create(*this,str)); -} - -GNativeString& -GNativeString::operator+= (const GBaseString &str) -{ - return init(GStringRep::Native::create(*this,str)); -} - -GNativeString -GNativeString::operator+(const GBaseString &s2) const -{ return GStringRep::Native::create(*this,s2); } - -GNativeString -GNativeString::operator+(const GNativeString &s2) const -{ return GStringRep::Native::create(*this,s2); } - -GNativeString -GNativeString::operator+(const char *s2) const -{ return GStringRep::Native::create(*this,s2); } - -char * -GNativeString::getbuf(int n) -{ - if(ptr) - init((*this)->getbuf(n)); - else if(n>0) - init(GStringRep::Native::create(n)); - else - init(0); - return ptr?((*this)->data):0; -} - -void -GNativeString::setat(const int n, const char ch) -{ - if((!n)&&(!ptr)) - { - init(GStringRep::Native::create(&ch,0,1)); - }else - { - init((*this)->setat(CheckSubscript(n),ch)); - } -} - -#endif - - -#ifdef HAVE_NAMESPACES -} -# ifndef NOT_USING_DJVU_NAMESPACE -using namespace DJVU; -# endif -#endif +//C- -*- C++ -*- +//C- ------------------------------------------------------------------- +//C- DjVuLibre-3.5 +//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. +//C- Copyright (c) 2001 AT&T +//C- +//C- This software is subject to, and may be distributed under, the +//C- GNU General Public License, either Version 2 of the license, +//C- or (at your option) any later version. The license should have +//C- accompanied the software or you may obtain a copy of the license +//C- from the Free Software Foundation at http://www.fsf.org . +//C- +//C- This program is distributed in the hope that it will be useful, +//C- but WITHOUT ANY WARRANTY; without even the implied warranty of +//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//C- GNU General Public License for more details. +//C- +//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from +//C- Lizardtech Software. Lizardtech Software has authorized us to +//C- replace the original DjVu(r) Reference Library notice by the following +//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu): +//C- +//C- ------------------------------------------------------------------ +//C- | DjVu (r) Reference Library (v. 3.5) +//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. +//C- | The DjVu Reference Library is protected by U.S. Pat. No. +//C- | 6,058,214 and patents pending. +//C- | +//C- | This software is subject to, and may be distributed under, the +//C- | GNU General Public License, either Version 2 of the license, +//C- | or (at your option) any later version. The license should have +//C- | accompanied the software or you may obtain a copy of the license +//C- | from the Free Software Foundation at http://www.fsf.org . +//C- | +//C- | The computer code originally released by LizardTech under this +//C- | license and unmodified by other parties is deemed "the LIZARDTECH +//C- | ORIGINAL CODE." Subject to any third party intellectual property +//C- | claims, LizardTech grants recipient a worldwide, royalty-free, +//C- | non-exclusive license to make, use, sell, or otherwise dispose of +//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the +//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU +//C- | General Public License. This grant only confers the right to +//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to +//C- | the extent such infringement is reasonably necessary to enable +//C- | recipient to make, have made, practice, sell, or otherwise dispose +//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to +//C- | any greater extent that may be necessary to utilize further +//C- | modifications or combinations. +//C- | +//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY +//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF +//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. +//C- +------------------------------------------------------------------ +// +// $Id: GString.cpp,v 1.26 2008/03/10 20:42:33 leonb Exp $ +// $Name: $ + +// From: Leon Bottou, 1/31/2002 +// This file has very little to do with my initial implementation. +// It has been practically rewritten by Lizardtech for i18n changes. +// My original implementation was very small in comparison +// . +// In my opinion, the duplication of the string classes is a failed +// attempt to use the type system to enforce coding policies. +// This could be fixed. But there are better things to do in djvulibre. + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#if NEED_GNUG_PRAGMAS +# pragma implementation +#endif + +#include "DjVuGString.h" +#include "GThreads.h" +#include "debug.h" + +#include +#include +#include +#if HAS_WCHAR +# include +# if !defined(AUTOCONF) || HAVE_WCHAR_H +# include +# endif +# if HAS_WCTYPE +# include +# endif +#endif +#include + +#ifndef DO_CHANGELOCALE +#define DO_CHANGELOCALE 1 +#ifdef UNIX +#if THREADMODEL != COTHREADS +#if THREADMODEL != NOTHREADS +#undef DO_CHANGELOCALE +#define DO_CHANGELOCALE 0 +#endif +#endif +#endif +#endif + + +#ifdef HAVE_NAMESPACES +namespace DJVU { +# ifdef NOT_DEFINED // Just to fool emacs c++ mode +} +#endif +#endif + + +GBaseString::~GBaseString() {} +GNativeString::~GNativeString() {} +GUTF8String::~GUTF8String() {} + +#if !HAS_MBSTATE && HAS_WCHAR +// Under some systems, wctomb() and mbtowc() are not thread +// safe. In those cases, wcrtomb and mbrtowc are preferred. +// For Solaris, wctomb() and mbtowc() are thread safe, and +// wcrtomb() and mbrtowc() don't exist. + +#define wcrtomb MYwcrtomb +#define mbrtowc MYmbrtowc +#define mbrlen MYmbrlen + +static inline int +wcrtomb(char *bytes,wchar_t w,mbstate_t *) +{ + return wctomb(bytes,w); +} + +static inline int +mbrtowc(wchar_t *w,const char *source, size_t n, mbstate_t *) +{ + return mbtowc(w,source,n); +} + +static inline size_t +mbrlen(const char *s, size_t n, mbstate_t *) +{ + return mblen(s,n); +} +#endif // !HAS_MBSTATE || HAS_WCHAR + + +GP +GStringRep::upcase(void) const +{ return tocase(giswupper,gtowupper); } + +GP +GStringRep::downcase(void) const +{ return tocase(giswlower,gtowlower); } + +GP +GStringRep::UTF8::create(const unsigned int sz) +{ + return GStringRep::create(sz,(GStringRep::UTF8 *)0); +} + +GP +GStringRep::UTF8::create(const char *s) +{ + GStringRep::UTF8 dummy; + return dummy.strdup(s); +} + +GP +GStringRep::UTF8::create(const GP &s1,const GP &s2) +{ + GStringRep::UTF8 dummy; + return dummy.concat(s1,s2); +} + +GP +GStringRep::UTF8::create( const GP &s1,const char *s2) +{ + GStringRep::UTF8 dummy; + return dummy.concat(s1,s2); +} + +GP +GStringRep::UTF8::create( const char *s1, const GP &s2) +{ + GStringRep::UTF8 dummy; + return dummy.concat(s1,s2); +} + +GP +GStringRep::UTF8::create( const char *s1,const char *s2) +{ + GStringRep::UTF8 dummy; + return dummy.concat(s1,s2); +} + +GP +GStringRep::UTF8::create(const char *s,const int start,const int length) +{ + GStringRep::UTF8 dummy; + return dummy.substr(s,start,length); +} + +GP +GStringRep::UTF8::create( + const unsigned short *s,const int start,const int length) +{ + GStringRep::UTF8 dummy; + return dummy.substr(s,start,length); +} + +GP +GStringRep::UTF8::create( + const unsigned long *s,const int start,const int length) +{ + GStringRep::UTF8 dummy; + return dummy.substr(s,start,length); +} + +GP +GStringRep::UTF8::blank(const unsigned int sz) const +{ + return GStringRep::create(sz,(GStringRep::UTF8 *)0); +} + +bool +GStringRep::UTF8::isUTF8(void) const +{ + return true; +} + +GP +GStringRep::UTF8::toThis( + const GP &rep,const GP &) const +{ + return rep?(rep->toUTF8(true)):rep; +} + +GP +GStringRep::UTF8::create(const char fmt[],va_list& args) +{ + const GP s(create(fmt)); + return (s?(s->vformat(args)):s); +} + +#if !HAS_WCHAR + +#define NATIVE_CREATE(x) UTF8::create( x ); + +#ifdef LC_ALL +#undef LC_ALL +#endif +#define LC_ALL 0 + +class GStringRep::ChangeLocale +{ +public: + ChangeLocale(const int,const char *) {} + ~ChangeLocale() {}; +}; + +GP +GStringRep::NativeToUTF8( const char *s ) +{ + return GStringRep::UTF8::create(s); +} + +#else + +#define NATIVE_CREATE(x) Native::create( x ); + +// The declaration and implementation of GStringRep::ChangeLocale +// Not used in WinCE + +class GStringRep::ChangeLocale +{ +public: + ChangeLocale(const int category,const char locale[]); + ~ChangeLocale(); +private: + GUTF8String locale; + int category; +}; + +class GStringRep::Native : public GStringRep +{ +public: + // default constructor + Native(void); + // virtual destructor + virtual ~Native(); + + // Other virtual methods. + // Create an empty string. + virtual GP blank(const unsigned int sz = 0) const; + // Append a string. + virtual GP append(const GP &s2) const; + // Test if Native. + virtual bool isNative(void) const; + // Convert to Native. + virtual GP toNative( + const EscapeMode escape=UNKNOWN_ESCAPED) const; + // Convert to UTF8. + virtual GP toUTF8(const bool nothrow=false) const; + // Convert to UTF8. + virtual GP toThis( + const GP &rep,const GP &) const; + // Compare with #s2#. + virtual int cmp(const GP &s2, const int len=(-1)) const; + + // Convert strings to numbers. + virtual int toInt(void) const; + virtual long toLong( + const int pos, int &endpos, const int base=10) const; + virtual unsigned long toULong( + const int pos, int &endpos, const int base=10) const; + virtual double toDouble( + const int pos, int &endpos) const; + + // Create an empty string + static GP create(const unsigned int sz = 0); + + // Create a strdup string. + static GP create(const char *s); + + // Creates by appending to the current string + + // Creates with a concat operation. + static GP create( + const GP &s1,const GP &s2); + static GP create( const GP &s1,const char *s2); + static GP create( const char *s1, const GP &s2); + static GP create(const char *s1,const char *s2); + + // Create with a strdup and substr operation. + static GP create( + const char *s,const int start,const int length=(-1)); + static GP create( + const unsigned short *s,const int start,const int length=(-1)); + static GP create( + const unsigned long *s,const int start,const int length=(-1)); + + // Create with an sprintf() + static GP create_format(const char fmt[],...); + static GP create(const char fmt[],va_list &args); + + virtual unsigned char *UCS4toString( + const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const; + + // Tests if a string is legally encoded in the current character set. + virtual bool is_valid(void) const; + + virtual int ncopy(wchar_t * const buf, const int buflen) const; + + friend class GBaseString; +protected: + // Return the next character and increment the source pointer. + virtual unsigned long getValidUCS4(const char *&source) const; +}; + +GP +GStringRep::Native::create(const unsigned int sz) +{ + return GStringRep::create(sz,(GStringRep::Native *)0); +} + + // Create a strdup string. +GP +GStringRep::Native::create(const char *s) +{ + GStringRep::Native dummy; + return dummy.strdup(s); +} + +GP +GStringRep::Native::create(const GP &s1,const GP &s2) +{ + GStringRep::Native dummy; + return dummy.concat(s1,s2); +} + +GP +GStringRep::Native::create( const GP &s1,const char *s2) +{ + GStringRep::Native dummy; + return dummy.concat(s1,s2); +} + +GP +GStringRep::Native::create( const char *s1, const GP &s2) +{ + GStringRep::Native dummy; + return dummy.concat(s1,s2); +} + +GP +GStringRep::Native::create(const char *s1,const char *s2) +{ + GStringRep::Native dummy; + return dummy.concat(s1,s2); +} + +GP +GStringRep::Native::create( + const char *s,const int start,const int length) +{ + GStringRep::Native dummy; + return dummy.substr(s,start,length); +} + +GP +GStringRep::Native::create( + const unsigned short *s,const int start,const int length) +{ + GStringRep::Native dummy; + return dummy.substr(s,start,length); +} + +GP +GStringRep::Native::create( + const unsigned long *s,const int start,const int length) +{ + GStringRep::Native dummy; + return dummy.substr(s,start,length); +} + +GP +GStringRep::Native::blank(const unsigned int sz) const +{ + return GStringRep::create(sz,(GStringRep::Native *)0); +} + +bool +GStringRep::Native::isNative(void) const +{ + return true; +} + +GP +GStringRep::Native::toThis( + const GP &rep,const GP &) const +{ + return rep?(rep->toNative(NOT_ESCAPED)):rep; +} + +GP +GStringRep::Native::create(const char fmt[],va_list &args) +{ + const GP s(create(fmt)); + return (s?(s->vformat(args)):s); +} + +int +GStringRep::Native::ncopy( + wchar_t * const buf, const int buflen ) const +{ + return toUTF8()->ncopy(buf,buflen); +} + +GStringRep::ChangeLocale::ChangeLocale(const int xcategory, const char xlocale[] ) + : category(xcategory) +{ +#if DO_CHANGELOCALE + // This is disabled under UNIX because + // it does not play nice with MT. + if(xlocale) + { + locale=setlocale(xcategory,0); + if(locale.length() &&(locale!=xlocale)) + { + if(locale == setlocale(category,xlocale)) + { + locale.empty(); + } + } + else + { + locale.empty(); + } + } +#endif +} + +GStringRep::ChangeLocale::~ChangeLocale() +{ +#if DO_CHANGELOCALE + if(locale.length()) + { + setlocale(category,(const char *)locale); + } +#endif +} + +GNativeString & +GNativeString::format(const char fmt[], ... ) +{ + va_list args; + va_start(args, fmt); + return init(GStringRep::Native::create(fmt,args)); +} + +// Gather the native implementations here. Not used in WinCE. + +GStringRep::Native::Native(void) {} +GStringRep::Native::~Native() {} + +GP +GStringRep::Native::append(const GP &s2) const +{ + GP retval; + if(s2) + { + if(s2->isUTF8()) + { + G_THROW( ERR_MSG("GStringRep.appendUTF8toNative") ); + } + retval=concat(data,s2->data); + }else + { + retval=const_cast(this); + } + return retval; +} + +GP +GStringRep::Native::create_format(const char fmt[],...) +{ + va_list args; + va_start(args, fmt); + return create(fmt,args); +} + +unsigned char * +GStringRep::Native::UCS4toString( + const unsigned long w0,unsigned char *ptr, mbstate_t *ps) const +{ + return UCS4toNative(w0,ptr,ps); +} + +// Convert a UCS4 to a multibyte string in the value bytes. +// The data pointed to by ptr should be long enough to contain +// the results with a nill termination. (Normally 7 characters +// is enough.) +unsigned char * +GStringRep::UCS4toNative( + const unsigned long w0,unsigned char *ptr, mbstate_t *ps) +{ + unsigned short w1; + unsigned short w2=1; + for(int count=(sizeof(wchar_t)==sizeof(w1)) ? UCS4toUTF16(w0,w1,w2) : 1; + count; + --count,w1=w2) + { + // wchar_t can be either UCS4 or UCS2 + const wchar_t w=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0; + int i=wcrtomb((char *)ptr,w,ps); + if(i<0) + { + break; + } + ptr[i]=0; + ptr += i; + } + ptr[0]=0; + return ptr; +} + +GP +GStringRep::Native::toNative(const EscapeMode escape) const +{ + if(escape == UNKNOWN_ESCAPED) + G_THROW( ERR_MSG("GStringRep.NativeToNative") ); + return const_cast(this); +} + +GP +GStringRep::Native::toUTF8(const bool) const +{ + unsigned char *buf; + GPBuffer gbuf(buf,size*6+1); + buf[0]=0; + if(data && size) + { + size_t n=size; + const char *source=data; + mbstate_t ps; + unsigned char *ptr=buf; + //(void)mbrlen(source, n, &ps); + memset(&ps,0,sizeof(mbstate_t)); + int i=0; + if(sizeof(wchar_t) == sizeof(unsigned long)) + { + wchar_t w = 0; + for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0); n-=i,source+=i) + { + ptr=UCS4toUTF8(w,ptr); + } + } + else + { + wchar_t w = 0; + for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0);n-=i,source+=i) + { + unsigned short s[2]; + s[0]=w; + unsigned long w0; + if(UTF16toUCS4(w0,s,s+1)<=0) + { + source+=i; + n-=i; + if((n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0)) + { + s[1]=w; + if(UTF16toUCS4(w0,s,s+2)<=0) + { + i=(-1); + break; + } + } + else + { + i=(-1); + break; + } + } + ptr=UCS4toUTF8(w0,ptr); + } + } + if(i<0) + { + gbuf.resize(0); + } + else + { + ptr[0]=0; + } + } + return GStringRep::UTF8::create((const char *)buf); +} + +GNativeString +GBaseString::UTF8ToNative( + const bool currentlocale,const EscapeMode escape) const +{ + const char *source=(*this); + GP retval; + if(source && source[0]) + { +#if DO_CHANGELOCALE + GUTF8String lc_ctype(setlocale(LC_CTYPE,0)); +#endif + bool repeat; + for(repeat=!currentlocale;;repeat=false) + { + retval=(*this)->toNative((GStringRep::EscapeMode)escape); +#if DO_CHANGELOCALE + if (!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,""))) +#endif + break; + } +#if DO_CHANGELOCALE + if(!repeat) + { + setlocale(LC_CTYPE,(const char *)lc_ctype); + } +#endif + } + return GNativeString(retval); +} + +/*MBCS*/ +GNativeString +GBaseString::getUTF82Native( EscapeMode escape ) const +{ //MBCS cvt + GNativeString retval; + + // We don't want to convert this if it + // already is known to be native... +// if (isNative()) return *this; + + const size_t slen=length()+1; + if(slen>1) + { + retval=UTF8ToNative(false,escape) ; + if(!retval.length()) + { + retval=(const char*)*this; + } + } + return retval; +} + +GUTF8String +GBaseString::NativeToUTF8(void) const +{ + GP retval; + if(length()) + { + const char *source=(*this); +#if DO_CHANGELOCALE + GUTF8String lc_ctype=setlocale(LC_CTYPE,0); +#endif + bool repeat; + for(repeat=true;;repeat=false) + { + if( (retval=GStringRep::NativeToUTF8(source)) ) + { + if(GStringRep::cmp(retval->toNative(),source)) + { + retval=GStringRep::UTF8::create((unsigned int)0); + } + } +#if DO_CHANGELOCALE + if(!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,""))) +#endif + break; + } +#if DO_CHANGELOCALE + if(!repeat) + { + setlocale(LC_CTYPE,(const char *)lc_ctype); + } +#endif + } + return GUTF8String(retval); +} + +GUTF8String +GBaseString::getNative2UTF8(void) const +{ //MBCS cvt + + // We don't want to do a transform this + // if we already are in the given type. +// if (isUTF8()) return *this; + + const size_t slen=length()+1; + GUTF8String retval; + if(slen > 1) + { + retval=NativeToUTF8(); + if(!retval.length()) + { + retval=(const char *)(*this); + } + } + return retval; +} /*MBCS*/ + +int +GStringRep::Native::cmp(const GP &s2,const int len) const +{ + int retval; + if(s2) + { + if(s2->isUTF8()) + { + const GP r(toUTF8(true)); + if(r) + { + retval=GStringRep::cmp(r->data,s2->data,len); + }else + { + retval=cmp(s2->toNative(NOT_ESCAPED),len); + } + }else + { + retval=GStringRep::cmp(data,s2->data,len); + } + }else + { + retval=GStringRep::cmp(data,0,len); + } + return retval; +} + +int +GStringRep::Native::toInt() const +{ + return atoi(data); +} + +long +GStringRep::Native::toLong( + const int pos, int &endpos, const int base) const +{ + char *edata=0; + const long retval=strtol(data+pos, &edata, base); + if(edata) + { + endpos=(int)((size_t)edata-(size_t)data); + }else + { + endpos=(-1); + } + return retval; +} + +unsigned long +GStringRep::Native::toULong( + const int pos, int &endpos, const int base) const +{ + char *edata=0; + const unsigned long retval=strtoul(data+pos, &edata, base); + if(edata) + { + endpos=(int)((size_t)edata-(size_t)data); + }else + { + endpos=(-1); + } + return retval; +} + +double +GStringRep::Native::toDouble( + const int pos, int &endpos) const +{ + char *edata=0; + const double retval=strtod(data+pos, &edata); + if(edata) + { + endpos=(int)((size_t)edata-(size_t)data); + }else + { + endpos=(-1); + } + return retval; +} + +unsigned long +GStringRep::Native::getValidUCS4(const char *&source) const +{ + unsigned long retval=0; + int n=(int)((size_t)size+(size_t)data-(size_t)source); + if(source && (n > 0)) + { + mbstate_t ps; + //(void)mbrlen(source, n, &ps); + memset(&ps,0,sizeof(mbstate_t)); + wchar_t wt; + const int len=mbrtowc(&wt,source,n,&ps); + if(len>=0) + { + if(sizeof(wchar_t) == sizeof(unsigned short)) + { + source+=len; + unsigned short s[2]; + s[0]=(unsigned short)wt; + if(UTF16toUCS4(retval,s,s+1)<=0) + { + if((n-=len)>0) + { + const int len=mbrtowc(&wt,source,n,&ps); + if(len>=0) + { + s[1]=(unsigned short)wt; + unsigned long w; + if(UTF16toUCS4(w,s,s+2)>0) + { + source+=len; + retval=w; + } + } + } + } + }else + { + retval=(unsigned long)wt; + source++; + } + }else + { + source++; + } + } + return retval; +} + +// Tests if a string is legally encoded in the current character set. +bool +GStringRep::Native::is_valid(void) const +{ + bool retval=true; + if(data && size) + { + size_t n=size; + const char *s=data; + mbstate_t ps; + //(void)mbrlen(s, n, &ps); + memset(&ps,0,sizeof(mbstate_t)); + do + { + size_t m=mbrlen(s,n,&ps); + if(m > n) + { + retval=false; + break; + }else if(m) + { + s+=m; + n-=m; + }else + { + break; + } + } while(n); + } + return retval; +} + +// These are dummy functions. +void +GStringRep::set_remainder(void const * const, const unsigned int, + const EncodeType) {} +void +GStringRep::set_remainder(void const * const, const unsigned int, + const GP &encoding) {} +void +GStringRep::set_remainder( const GP &) {} + +GP +GStringRep::get_remainder( void ) const +{ + return 0; +} + +GNativeString::GNativeString(const char dat) +{ + init(GStringRep::Native::create(&dat,0,1)); +} + +GNativeString::GNativeString(const char *str) +{ + init(GStringRep::Native::create(str)); +} + +GNativeString::GNativeString(const unsigned char *str) +{ + init(GStringRep::Native::create((const char *)str)); +} + +GNativeString::GNativeString(const unsigned short *str) +{ + init(GStringRep::Native::create(str,0,-1)); +} + +GNativeString::GNativeString(const unsigned long *str) +{ + init(GStringRep::Native::create(str,0,-1)); +} + +GNativeString::GNativeString(const char *dat, unsigned int len) +{ + init( + GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); +} + +GNativeString::GNativeString(const unsigned short *dat, unsigned int len) +{ + init( + GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); +} + +GNativeString::GNativeString(const unsigned long *dat, unsigned int len) +{ + init( + GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); +} + +GNativeString::GNativeString(const GNativeString &str) +{ + init(str); +} + +GNativeString::GNativeString(const GBaseString &gs, int from, int len) +{ + init( + GStringRep::Native::create(gs,from,((int)len<0)?(-1):(int)len)); +} + +GNativeString::GNativeString(const int number) +{ + init(GStringRep::Native::create_format("%d",number)); +} + +GNativeString::GNativeString(const double number) +{ + init(GStringRep::Native::create_format("%f",number)); +} + +GNativeString& +GNativeString::operator= (const char str) +{ return init(GStringRep::Native::create(&str,0,1)); } + +GNativeString& +GNativeString::operator= (const char *str) +{ return init(GStringRep::Native::create(str)); } + +GNativeString +GBaseString::operator+(const GNativeString &s2) const +{ + return GStringRep::Native::create(*this,s2); +} + +GP +GStringRep::NativeToUTF8( const char *s ) +{ + return GStringRep::Native::create(s)->toUTF8(); +} + +#endif // HAS_WCHAR + +template +GP +GStringRep::create(const unsigned int sz, TYPE *) +{ + GP gaddr; + if (sz > 0) + { + GStringRep *addr; + gaddr=(addr=new TYPE); + addr->data=(char *)(::operator new(sz+1)); + addr->size = sz; + addr->data[sz] = 0; + } + return gaddr; +} + +GP +GStringRep::strdup(const char *s) const +{ + GP retval; + const int length=s?strlen(s):0; + if(length>0) + { + retval=blank(length); + char const * const end=s+length; + char *ptr=retval->data; + for(;*s&&(s!=end);ptr++) + { + ptr[0]=s++[0]; + } + ptr[0]=0; + } + return retval; +} + +GP +GStringRep::substr(const char *s,const int start,const int len) const +{ + GP retval; + if(s && s[0]) + { + const unsigned int length=(start<0 || len<0)?(unsigned int)strlen(s):(unsigned int)(-1); + const char *startptr, *endptr; + if(start<0) + { + startptr=s+length+start; + if(startptrstartptr) + { + retval=blank((size_t)(endptr-startptr)); + char *data=retval->data; + for(; (startptr +GStringRep::substr(const unsigned short *s,const int start,const int len) const +{ + GP retval; + if(s && s[0]) + { + unsigned short const *eptr; + if(len<0) + { + for(eptr=s;eptr[0];++eptr) + EMPTY_LOOP; + }else + { + eptr=&(s[len]); + } + s=&s[start]; + if((size_t)s<(size_t)eptr) + { + mbstate_t ps; + memset(&ps,0,sizeof(mbstate_t)); + unsigned char *buf,*ptr; + GPBuffer gbuf(buf,(((size_t)eptr-(size_t)s)/2)*3+7); + for(ptr=buf;s[0];) + { + unsigned long w; + int i=UTF16toUCS4(w,s,eptr); + if(i<=0) + break; + s+=i; + ptr=UCS4toString(w,ptr,&ps); + } + ptr[0]=0; + retval = strdup( (const char *)buf ); + } + } + return retval; +} + +GP +GStringRep::substr(const unsigned long *s,const int start,const int len) const +{ + GP retval; + if(s && s[0]) + { + unsigned long const *eptr; + if(len<0) + { + for(eptr=s;eptr[0];++eptr) + EMPTY_LOOP; + }else + { + eptr=&(s[len]); + } + s=&s[start]; + if((size_t)s<(size_t)eptr) + { + mbstate_t ps; + memset(&ps,0,sizeof(mbstate_t)); + unsigned char *buf,*ptr; + GPBuffer gbuf(buf,((((size_t)eptr-(size_t)s))/4)*6+7); + for(ptr=buf;s[0];++s) + { + ptr=UCS4toString(s[0],ptr,&ps); + } + ptr[0]=0; + retval = strdup( (const char *)buf ); + } + } + return retval; +} + +GP +GStringRep::append(const char *s2) const +{ + GP retval; + if(s2) + { + retval=concat(data,s2); + }else + { + retval=const_cast(this); + } + return retval; +} + +GP +GStringRep::UTF8::append(const GP &s2) const +{ + GP retval; + if(s2) + { + if(s2->isNative()) + { + G_THROW( ERR_MSG("GStringRep.appendNativeToUTF8") ); + } + retval=concat(data,s2->data); + }else + { + retval=const_cast(this); + } + return retval; +} + +GP +GStringRep::concat(const char *s1,const char *s2) const +{ + const int length1=(s1?strlen(s1):0); + const int length2=(s2?strlen(s2):0); + const int length=length1+length2; + GP retval; + if(length>0) + { + retval=blank(length); + GStringRep &r=*retval; + if(length1) + { + strcpy(r.data,s1); + if(length2) + strcat(r.data,s2); + }else + { + strcpy(r.data,s2); + } + } + return retval; +} + +const char *GBaseString::nullstr = ""; + +void +GBaseString::empty( void ) +{ + init(0); +} + +GP +GStringRep::getbuf(int n) const +{ + GP retval; + if(n< 0) + n=strlen(data); + if(n>0) + { + retval=blank(n); + char *ndata=retval->data; + strncpy(ndata,data,n); + ndata[n]=0; + } + return retval; +} + +const char * +GStringRep::isCharType( + bool (*xiswtest)(const unsigned long wc), const char *ptr, const bool reverse) const +{ + char const * xptr=ptr; + const unsigned long w=getValidUCS4(xptr); + if((ptr != xptr) + &&(((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ||(reverse?(!xiswtest(w)):xiswtest(w)))) + { + ptr=xptr; + } + return ptr; +} + +int +GStringRep::nextCharType( + bool (*xiswtest)(const unsigned long wc), const int from, const int len, + const bool reverse) const +{ + // We want to return the position of the next + // non white space starting from the #from# + // location. isspace should work in any locale + // so we should only need to do this for the non- + // native locales (UTF8) + int retval; + if(from 0xff) + _w = (unsigned char)w; + + return + (w&~0xff)?(true):(((unsigned long)isspace(_w))||((w == '\r')||(w == '\n'))); +#endif +} + +bool +GStringRep::giswupper(const unsigned long w) +{ +#if HAS_WCTYPE + return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ?(true):((unsigned long)iswupper((wchar_t)w)?true:false); +#else + return (w&~0xff)?(true):((unsigned long)isupper((char)w)?true:false); +#endif +} + +bool +GStringRep::giswlower(const unsigned long w) +{ +#if HAS_WCTYPE + return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ?(true):((unsigned long)iswlower((wchar_t)w)?true:false); +#else + return (w&~0xff)?(true):((unsigned long)islower((char)w)?true:false); +#endif +} + +unsigned long +GStringRep::gtowupper(const unsigned long w) +{ +#if HAS_WCTYPE + return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ?w:((unsigned long)towupper((wchar_t)w)); +#else + return (w&~0xff)?w:((unsigned long)toupper((char)w)); +#endif +} + +unsigned long +GStringRep::gtowlower(const unsigned long w) +{ +#if HAS_WCTYPE + return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ?w:((unsigned long)towlower((wchar_t)w)); +#else + return (w&~0xff)?w:((unsigned long)tolower((char)w)); +#endif +} + +GP +GStringRep::tocase( + bool (*xiswcase)(const unsigned long wc), + unsigned long (*xtowcase)(const unsigned long wc)) const +{ + GP retval; + char const * const eptr=data+size; + char const *ptr=data; + while(ptr gbuf(buf,n+(1+size-n)*6); + if(n>0) + { + strncpy((char *)buf,data,n); + } + unsigned char *buf_ptr=buf+n; + for(char const *ptr=data+n;ptr(this); + } + return retval; +} + +// Returns a copy of this string with characters used in XML escaped as follows: +// '<' --> "<" +// '>' --> ">" +// '&' --> "&" +// '\'' --> "'" +// '\"' --> """ +// Also escapes characters 0x00 through 0x1f and 0x7e through 0x7f. +GP +GStringRep::toEscaped( const bool tosevenbit ) const +{ + bool modified=false; + char *ret; + GPBuffer gret(ret,size*7); + ret[0]=0; + char *retptr=ret; + char const *start=data; + char const *s=start; + char const *last=s; + GP special; + for(unsigned long w;(w=getValidUCS4(s));last=s) + { + char const *ss=0; + switch(w) + { + case '<': + ss="<"; + break; + case '>': + ss=">"; + break; + case '&': + ss="&"; + break; + case '\47': + ss="'"; + break; + case '\42': + ss="""; + break; + default: + if((w<' ')||(w>=0x7e && (tosevenbit || (w < 0x80)))) + { + special=toThis(UTF8::create_format("&#%lu;",w)); + ss=special->data; + } + break; + } + if(ss) + { + modified=true; + if(s!=start) + { + size_t len=(size_t)last-(size_t)start; + strncpy(retptr,start,len); + retptr+=len; + start=s; + } + if(ss[0]) + { + size_t len=strlen(ss); + strcpy(retptr,ss); + retptr+=len; + } + } + } + GP retval; + if(modified) + { + strcpy(retptr,start); + retval=strdup( ret ); + }else + { + retval=const_cast(this); + } +// DEBUG_MSG( "Escaped string is '" << ret << "'\n" ); + return retval; +} + + +static const GMap & +BasicMap( void ) +{ + static GMap Basic; + if (! Basic.size()) + { + Basic["lt"] = GUTF8String('<'); + Basic["gt"] = GUTF8String('>'); + Basic["amp"] = GUTF8String('&'); + Basic["apos"] = GUTF8String('\47'); + Basic["quot"] = GUTF8String('\42'); + } + return Basic; +} + +GUTF8String +GUTF8String::fromEscaped( const GMap ConvMap ) const +{ + GUTF8String ret; // Build output string here + int start_locn = 0; // Beginning of substring to skip + int amp_locn; // Location of a found ampersand + + while( (amp_locn = search( '&', start_locn )) > -1 ) + { + // Found the next apostrophe + // Locate the closing semicolon + const int semi_locn = search( ';', amp_locn ); + // No closing semicolon, exit and copy + // the rest of the string. + if( semi_locn < 0 ) + break; + ret += substr( start_locn, amp_locn - start_locn ); + int const len = semi_locn - amp_locn - 1; + if(len) + { + GUTF8String key = substr( amp_locn+1, len); + //DEBUG_MSG( "key = '" << key << "'\n" ); + char const * s=key; + if( s[0] == '#') + { + unsigned long value; + char *ptr=0; + if(s[1] == 'x' || s[1] == 'X') + { + value=strtoul((char const *)(s+2),&ptr,16); + }else + { + value=strtoul((char const *)(s+1),&ptr,10); + } + if(ptr) + { + unsigned char utf8char[7]; + unsigned char const * const end=GStringRep::UCS4toUTF8(value,utf8char); + ret+=GUTF8String((char const *)utf8char,(size_t)end-(size_t)utf8char); + }else + { + ret += substr( amp_locn, semi_locn - amp_locn + 1 ); + } + }else + { + GPosition map_entry = ConvMap.contains( key ); + if( map_entry ) + { // Found in the conversion map, substitute + ret += ConvMap[map_entry]; + } else + { + static const GMap &Basic = BasicMap(); + GPosition map_entry = Basic.contains( key ); + if ( map_entry ) + { + ret += Basic[map_entry]; + }else + { + ret += substr( amp_locn, len+2 ); + } + } + } + }else + { + ret += substr( amp_locn, len+2 ); + } + start_locn = semi_locn + 1; +// DEBUG_MSG( "ret = '" << ret << "'\n" ); + } + + // Copy the end of the string to the output + ret += substr( start_locn, length()-start_locn ); + +// DEBUG_MSG( "Unescaped string is '" << ret << "'\n" ); + return (ret == *this)?(*this):ret; +} + +GUTF8String +GUTF8String::fromEscaped(void) const +{ + const GMap nill; + return fromEscaped(nill); +} + +GP +GStringRep::setat(int n, char ch) const +{ + GP retval; + if(n<0) + n+=size; + if (n < 0 || n>size) + GBaseString::throw_illegal_subscript(); + if(ch == data[n]) + { + retval=const_cast(this); + }else if(!ch) + { + retval=getbuf(n); + }else + { + retval=getbuf((ndata[n]=ch; + if(n == size) + retval->data[n+1]=0; + } + return retval; +} + +#ifndef WASM_MODE +#if defined(AUTOCONF) && defined(HAVE_VSNPRINTF) +# define USE_VSNPRINTF vsnprintf +#elif defined(WIN32) && !defined(__CYGWIN32__) +# define USE_VSNPRINTF _vsnprintf +#elif defined(linux) +# define USE_VSNPRINTF vsnprintf +#endif +#endif + +GUTF8String & +GUTF8String::format(const char fmt[], ... ) +{ + va_list args; + va_start(args, fmt); + return init(GStringRep::UTF8::create(fmt,args)); +} + +GP +GStringRep::UTF8::create_format(const char fmt[],...) +{ + va_list args; + va_start(args, fmt); + return create(fmt,args); +} + +GP +GStringRep::vformat(va_list args) const +{ + GP retval; + if(size) + { + char const * const fmt=data; + int buflen=32768; + char *buffer; + GPBuffer gbuffer(buffer,buflen); + ChangeLocale locale(LC_NUMERIC,(isNative()?0:"C")); + // Format string +#ifdef USE_VSNPRINTF + while(USE_VSNPRINTF(buffer, buflen, fmt, args)<0) + { + gbuffer.resize(0); + gbuffer.resize(buflen+32768); + } + va_end(args); +#else + buffer[buflen-1] = 0; + vsprintf(buffer, fmt, args); + va_end(args); + if (buffer[buflen-1]) + { + // This isn't as fatal since it is on the stack, but we + // definitely should stop the current operation. + G_THROW( ERR_MSG("GString.overwrite") ); + } +#endif + retval=strdup((const char *)buffer); + } + // Go altering the string + return retval; +} + +int +GStringRep::search(char c, int from) const +{ + if (from<0) + from += size; + int retval=(-1); + if (from>=0 && from=0 && from=0) && (from= 0;++loc) + retval=loc; + return retval; +} + +int +GStringRep::contains(const char accept[],int from) const +{ + if(from<0) + { + from+=size; + if(from<0) + G_THROW( ERR_MSG("GString.bad_subscript") ); + } + int retval=(-1); + if (accept && accept[0] && from>=0 && from= 0) + { + retval=from++; + } + return retval; +} + +bool +GBaseString::is_int(void) const +{ + bool isLong=!!ptr; + if(isLong) + { + int endpos; + (*this)->toLong(0,endpos); + if(endpos>=0) + { + isLong=((*this)->nextNonSpace(endpos) == (int)length()); + } + } + return isLong; +} + +bool +GBaseString::is_float(void) const +{ + bool isDouble=!!ptr; + if(isDouble) + { + int endpos; + (*this)->toDouble(0,endpos); + if(endpos>=0) + { + isDouble=((*this)->nextNonSpace(endpos) == (int)length()); + } + } + return isDouble; +} + +unsigned int +hash(const GBaseString &str) +{ + unsigned int x = 0; + const char *s = (const char*)str; + while (*s) + x = x ^ (x<<6) ^ (unsigned char)(*s++); + return x; +} + +void +GBaseString::throw_illegal_subscript() +{ + G_THROW( ERR_MSG("GString.bad_subscript") ); +} + +unsigned char * +GStringRep::UTF8::UCS4toString( + const unsigned long w0,unsigned char *ptr, mbstate_t *) const +{ + return UCS4toUTF8(w0,ptr); +} + +int +GStringRep::UTF8::ncopy( + wchar_t * const buf, const int buflen ) const +{ + int retval=(-1); + if(buf && buflen) + { + buf[0]=0; + if(data[0]) + { + const size_t length=strlen(data); + const unsigned char * const eptr=(const unsigned char *)(data+length); + wchar_t *r=buf; + wchar_t const * const rend=buf+buflen; + for(const unsigned char *s=(const unsigned char *)data;(r +GStringRep::UTF8::toNative(const EscapeMode escape) const +{ + GP retval; + if(data[0]) + { + const size_t length=strlen(data); + const unsigned char * const eptr=(const unsigned char *)(data+length); + unsigned char *buf; + GPBuffer gbuf(buf,12*length+12); + unsigned char *r=buf; + mbstate_t ps; + memset(&ps,0,sizeof(mbstate_t)); + for(const unsigned char *s=(const unsigned char *)data;(s +GStringRep::UTF8::toUTF8(const bool nothrow) const +{ + if(!nothrow) + G_THROW( ERR_MSG("GStringRep.UTF8ToUTF8") ); + return const_cast(this); +} + +// Tests if a string is legally encoded in the current character set. +bool +GStringRep::UTF8::is_valid(void) const +{ + bool retval=true; + if(data && size) + { + const unsigned char * const eptr=(const unsigned char *)(data+size); + for(const unsigned char *s=(const unsigned char *)data;(s>6)|0xC0); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else if(w <= 0xFFFF) + { + *ptr++ = (unsigned char)((w>>12)|0xE0); + *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else if(w <= 0x1FFFFF) + { + *ptr++ = (unsigned char)((w>>18)|0xF0); + *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else if(w <= 0x3FFFFFF) + { + *ptr++ = (unsigned char)((w>>24)|0xF8); + *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else if(w <= 0x7FFFFFFF) + { + *ptr++ = (unsigned char)((w>>30)|0xFC); + *ptr++ = (unsigned char)(((w>>24)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else + { + *ptr++ = '?'; + } + return ptr; +} + + // Creates with a concat operation. +GP +GStringRep::concat( const char *s1, const GP &s2) const +{ + GP retval; + if(s2) + { + retval=toThis(s2); + if(s1 && s1[0]) + { + if(retval) + { + retval=concat(s1,retval->data); + }else + { + retval=strdup(s1); + } + } + }else if(s1 && s1[0]) + { + retval=strdup(s1); + } + return retval; +} + + // Creates with a concat operation. + +GP +GStringRep::concat( const GP &s1,const char *s2) const +{ + GP retval; + if(s1) + { + retval=toThis(s1); + if(s2 && s2[0]) + { + if(retval) + { + retval=retval->append(s2); + }else + { + retval=strdup(s2); + } + } + }else if(s2 && s2[0]) + { + retval=strdup(s2); + } + return retval; +} + +GP +GStringRep::concat(const GP &s1,const GP &s2) const +{ + GP retval; + if(s1) + { + retval=toThis(s1,s2); + if(retval && s2) + { + retval=retval->append(toThis(s2)); + } + }else if(s2) + { + retval=toThis(s2); + } + return retval; +} + +//< Changed for WinDjView project +//#ifdef WIN32 +#if 0 +//> +static const char *setlocale_win32(void) +{ + static const char *locale=setlocale(LC_ALL,0); + if(! locale || (locale[0] == 'C' && !locale[1])) + { + locale=setlocale(LC_ALL,""); + } + return locale; +} +const char *setlocale_win32_var = setlocale_win32(); +#endif + +GStringRep::GStringRep(void) +{ + size=0; + data=0; +} + +GStringRep::~GStringRep() +{ + if(data) + { + data[0]=0; + ::operator delete(data); + } + data=0; +} + +GStringRep::UTF8::UTF8(void) {} + +GStringRep::UTF8::~UTF8() {} + +int +GStringRep::cmp(const char *s1,const int len) const +{ + return cmp(data,s1,len); +} + +int +GStringRep::cmp(const char *s1, const char *s2,const int len) +{ + return (len + ?((s1&&s1[0]) + ?((s2&&s2[0]) + ?((len>0) + ?strncmp(s1,s2,len) + :strcmp(s1,s2)) + :1) + :((s2&&s2[0])?(-1):0)) + :0); +} + +int +GStringRep::cmp(const GP &s1, const GP &s2, + const int len ) +{ + return (s1?(s1->cmp(s2,len)):cmp(0,(s2?(s2->data):0),len)); +} + +int +GStringRep::cmp(const GP &s1, const char *s2, + const int len ) +{ + return cmp((s1?s1->data:0),s2,len); +} + +int +GStringRep::cmp(const char *s1, const GP &s2, + const int len ) +{ + return cmp(s1,(s2?(s2->data):0),len); +} + +int +GStringRep::UTF8::cmp(const GP &s2,const int len) const +{ + int retval; + if(s2) + { + if(s2->isNative()) + { + GP r(s2->toUTF8(true)); + if(r) + { + retval=GStringRep::cmp(data,r->data,len); + }else + { + retval=-(s2->cmp(toNative(NOT_ESCAPED),len)); + } + }else + { + retval=GStringRep::cmp(data,s2->data,len); + } + }else + { + retval=GStringRep::cmp(data,0,len); + } + return retval; +} + +int +GStringRep::UTF8::toInt() const +{ + int endpos; + return (int)toLong(0,endpos); +} + +static inline long +Cstrtol(char *data,char **edata, const int base) +{ + GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); + while (data && *data==' ') data++; + return strtol(data,edata,base); +} + +long +GStringRep::UTF8::toLong( + const int pos, int &endpos, const int base) const +{ + char *edata=0; + long retval=Cstrtol(data+pos,&edata, base); + if(edata) + { + endpos=edata-data; + }else + { + endpos=(-1); + GP ptr=ptr->strdup(data+pos); + if(ptr) + ptr=ptr->toNative(NOT_ESCAPED); + if(ptr) + { + int xendpos; + retval=ptr->toLong(0,xendpos,base); + if(xendpos> 0) + { + endpos=(int)size; + ptr=ptr->strdup(data+xendpos); + if(ptr) + { + ptr=ptr->toUTF8(true); + if(ptr) + { + endpos-=(int)(ptr->size); + } + } + } + } + } + return retval; +} + +static inline unsigned long +Cstrtoul(char *data,char **edata, const int base) +{ + GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); + while (data && *data==' ') data++; + return strtoul(data,edata,base); +} + +unsigned long +GStringRep::UTF8::toULong( + const int pos, int &endpos, const int base) const +{ + char *edata=0; + unsigned long retval=Cstrtoul(data+pos,&edata, base); + if(edata) + { + endpos=edata-data; + }else + { + endpos=(-1); + GP ptr=ptr->strdup(data+pos); + if(ptr) + ptr=ptr->toNative(NOT_ESCAPED); + if(ptr) + { + int xendpos; + retval=ptr->toULong(0,xendpos,base); + if(xendpos> 0) + { + endpos=(int)size; + ptr=ptr->strdup(data+xendpos); + if(ptr) + { + ptr=ptr->toUTF8(true); + if(ptr) + { + endpos-=(int)(ptr->size); + } + } + } + } + } + return retval; +} + +static inline double +Cstrtod(char *data,char **edata) +{ + GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); + while (data && *data==' ') data++; + return strtod(data,edata); +} + +double +GStringRep::UTF8::toDouble(const int pos, int &endpos) const +{ + char *edata=0; + double retval=Cstrtod(data+pos,&edata); + if(edata) + { + endpos=edata-data; + }else + { + endpos=(-1); + GP ptr=ptr->strdup(data+pos); + if(ptr) + ptr=ptr->toNative(NOT_ESCAPED); + if(ptr) + { + int xendpos; + retval=ptr->toDouble(0,xendpos); + if(xendpos >= 0) + { + endpos=(int)size; + ptr=ptr->strdup(data+xendpos); + if(ptr) + { + ptr=ptr->toUTF8(true); + if(ptr) + { + endpos-=(int)(ptr->size); + } + } + } + } + } + return retval; +} + +int +GStringRep::getUCS4(unsigned long &w, const int from) const +{ + int retval; + if(from>=size) + { + w=0; + retval=size; + }else if(from<0) + { + w=(unsigned int)(-1); + retval=(-1); + }else + { + const char *source=data+from; + w=getValidUCS4(source); + retval=(int)((size_t)source-(size_t)data); + } + return retval; +} + + +unsigned long +GStringRep::UTF8::getValidUCS4(const char *&source) const +{ + return GStringRep::UTF8toUCS4((const unsigned char *&)source,data+size); +} + +int +GStringRep::nextNonSpace(const int from,const int len) const +{ + return nextCharType(giswspace,from,len,true); +} + +int +GStringRep::nextSpace(const int from,const int len) const +{ + return nextCharType(giswspace,from,len,false); +} + +int +GStringRep::nextChar(const int from) const +{ + char const * xptr=data+from; + (void)getValidUCS4(xptr); + return (int)((size_t)xptr-(size_t)data); +} + +int +GStringRep::firstEndSpace(int from,const int len) const +{ + const int xsize=(len<0)?size:(from+len); + const int ysize=(size>10)&0x3ff)+0xD800); + w2=(unsigned short)((w&0x3ff)+0xDC00); + retval=2; + } + return retval; +} + +int +GStringRep::UTF16toUCS4( + unsigned long &U,unsigned short const * const s,void const * const eptr) +{ + int retval=0; + U=0; + unsigned short const * const r=s+1; + if(r <= eptr) + { + unsigned long const W1=s[0]; + if((W1<0xD800)||(W1>0xDFFF)) + { + if((U=W1)) + { + retval=1; + } + }else if(W1<=0xDBFF) + { + unsigned short const * const rr=r+1; + if(rr <= eptr) + { + unsigned long const W2=s[1]; + if(((W2>=0xDC00)||(W2<=0xDFFF))&&((U=(0x10000+((W1&0x3ff)<<10))|(W2&0x3ff)))) + { + retval=2; + }else + { + retval=(-1); + } + } + } + } + return retval; +} + + +//bcr + +GUTF8String& +GUTF8String::operator+= (char ch) +{ + return init( + GStringRep::UTF8::create((const char*)*this, + GStringRep::UTF8::create(&ch,0,1))); +} + +GUTF8String& +GUTF8String::operator+= (const char *str) +{ + return init(GStringRep::UTF8::create(*this,str)); +} + +GUTF8String& +GUTF8String::operator+= (const GBaseString &str) +{ + return init(GStringRep::UTF8::create(*this,str)); +} + +GUTF8String +GUTF8String::substr(int from, int len) const +{ return GUTF8String(*this, from, len); } + +GUTF8String +GUTF8String::operator+(const GBaseString &s2) const +{ return GStringRep::UTF8::create(*this,s2); } + +GUTF8String +GUTF8String::operator+(const GUTF8String &s2) const +{ return GStringRep::UTF8::create(*this,s2); } + +GUTF8String +GUTF8String::operator+(const char *s2) const +{ return GStringRep::UTF8::create(*this,s2); } + +char * +GUTF8String::getbuf(int n) +{ + if(ptr) + init((*this)->getbuf(n)); + else if(n>0) + init(GStringRep::UTF8::create(n)); + else + init(0); + return ptr?((*this)->data):0; +} + +void +GUTF8String::setat(const int n, const char ch) +{ + if((!n)&&(!ptr)) + { + init(GStringRep::UTF8::create(&ch,0,1)); + }else + { + init((*this)->setat(CheckSubscript(n),ch)); + } +} + +GP +GStringRep::UTF8ToNative( const char *s, const EscapeMode escape ) +{ + return GStringRep::UTF8::create(s)->toNative(escape); +} + +GUTF8String::GUTF8String(const char dat) +{ init(GStringRep::UTF8::create(&dat,0,1)); } + +GUTF8String::GUTF8String(const GUTF8String &fmt, va_list &args) +{ + if (fmt.ptr) + init(fmt->vformat(args)); + else + init(fmt); +} + +GUTF8String::GUTF8String(const char *str) +{ init(GStringRep::UTF8::create(str)); } + +GUTF8String::GUTF8String(const unsigned char *str) +{ init(GStringRep::UTF8::create((const char *)str)); } + +GUTF8String::GUTF8String(const unsigned short *str) +{ init(GStringRep::UTF8::create(str,0,-1)); } + +GUTF8String::GUTF8String(const unsigned long *str) +{ init(GStringRep::UTF8::create(str,0,-1)); } + +GUTF8String::GUTF8String(const char *dat, unsigned int len) +{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } + +GUTF8String::GUTF8String(const unsigned short *dat, unsigned int len) +{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } + +GUTF8String::GUTF8String(const unsigned long *dat, unsigned int len) +{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } + +GUTF8String::GUTF8String(const GBaseString &gs, int from, int len) +{ init(GStringRep::UTF8::create(gs,from,((int)len<0)?(-1):(int)len)); } + +GUTF8String::GUTF8String(const int number) +{ init(GStringRep::UTF8::create_format("%d",number)); } + +GUTF8String::GUTF8String(const double number) +{ init(GStringRep::UTF8::create_format("%f",number)); } + +GUTF8String& GUTF8String::operator= (const char str) +{ return init(GStringRep::UTF8::create(&str,0,1)); } + +GUTF8String& GUTF8String::operator= (const char *str) +{ return init(GStringRep::UTF8::create(str)); } + +GUTF8String GBaseString::operator+(const GUTF8String &s2) const +{ return GStringRep::UTF8::create(*this,s2); } + +#if HAS_WCHAR +GUTF8String +GNativeString::operator+(const GUTF8String &s2) const +{ + if (ptr) + return GStringRep::UTF8::create((*this)->toUTF8(true),s2); + else + return GStringRep::UTF8::create((*this),s2); +} +#endif + +GUTF8String +GUTF8String::operator+(const GNativeString &s2) const +{ + GP g = s2; + if (s2.ptr) + g = s2->toUTF8(true); + return GStringRep::UTF8::create(*this,g); +} + +GUTF8String +operator+(const char *s1, const GUTF8String &s2) +{ return GStringRep::UTF8::create(s1,s2); } + +#if HAS_WCHAR +GNativeString +operator+(const char *s1, const GNativeString &s2) +{ return GStringRep::Native::create(s1,s2); } + +GNativeString& +GNativeString::operator+= (char ch) +{ + char s[2]; s[0]=ch; s[1]=0; + return init(GStringRep::Native::create((const char*)*this, s)); +} + +GNativeString& +GNativeString::operator+= (const char *str) +{ + return init(GStringRep::Native::create(*this,str)); +} + +GNativeString& +GNativeString::operator+= (const GBaseString &str) +{ + return init(GStringRep::Native::create(*this,str)); +} + +GNativeString +GNativeString::operator+(const GBaseString &s2) const +{ return GStringRep::Native::create(*this,s2); } + +GNativeString +GNativeString::operator+(const GNativeString &s2) const +{ return GStringRep::Native::create(*this,s2); } + +GNativeString +GNativeString::operator+(const char *s2) const +{ return GStringRep::Native::create(*this,s2); } + +char * +GNativeString::getbuf(int n) +{ + if(ptr) + init((*this)->getbuf(n)); + else if(n>0) + init(GStringRep::Native::create(n)); + else + init(0); + return ptr?((*this)->data):0; +} + +void +GNativeString::setat(const int n, const char ch) +{ + if((!n)&&(!ptr)) + { + init(GStringRep::Native::create(&ch,0,1)); + }else + { + init((*this)->setat(CheckSubscript(n),ch)); + } +} + +#endif + + +#ifdef HAVE_NAMESPACES +} +# ifndef NOT_USING_DJVU_NAMESPACE +using namespace DJVU; +# endif +#endif diff --git a/DjVuFile/libdjvu/GString.h b/DjVuFile/libdjvu/DjVuGString.h similarity index 97% rename from DjVuFile/libdjvu/GString.h rename to DjVuFile/libdjvu/DjVuGString.h index 1409ba21fa..b053631fd8 100644 --- a/DjVuFile/libdjvu/GString.h +++ b/DjVuFile/libdjvu/DjVuGString.h @@ -1,1677 +1,1677 @@ -//C- -*- C++ -*- -//C- ------------------------------------------------------------------- -//C- DjVuLibre-3.5 -//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. -//C- Copyright (c) 2001 AT&T -//C- -//C- This software is subject to, and may be distributed under, the -//C- GNU General Public License, either Version 2 of the license, -//C- or (at your option) any later version. The license should have -//C- accompanied the software or you may obtain a copy of the license -//C- from the Free Software Foundation at http://www.fsf.org . -//C- -//C- This program is distributed in the hope that it will be useful, -//C- but WITHOUT ANY WARRANTY; without even the implied warranty of -//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//C- GNU General Public License for more details. -//C- -//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from -//C- Lizardtech Software. Lizardtech Software has authorized us to -//C- replace the original DjVu(r) Reference Library notice by the following -//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu): -//C- -//C- ------------------------------------------------------------------ -//C- | DjVu (r) Reference Library (v. 3.5) -//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. -//C- | The DjVu Reference Library is protected by U.S. Pat. No. -//C- | 6,058,214 and patents pending. -//C- | -//C- | This software is subject to, and may be distributed under, the -//C- | GNU General Public License, either Version 2 of the license, -//C- | or (at your option) any later version. The license should have -//C- | accompanied the software or you may obtain a copy of the license -//C- | from the Free Software Foundation at http://www.fsf.org . -//C- | -//C- | The computer code originally released by LizardTech under this -//C- | license and unmodified by other parties is deemed "the LIZARDTECH -//C- | ORIGINAL CODE." Subject to any third party intellectual property -//C- | claims, LizardTech grants recipient a worldwide, royalty-free, -//C- | non-exclusive license to make, use, sell, or otherwise dispose of -//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the -//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU -//C- | General Public License. This grant only confers the right to -//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to -//C- | the extent such infringement is reasonably necessary to enable -//C- | recipient to make, have made, practice, sell, or otherwise dispose -//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to -//C- | any greater extent that may be necessary to utilize further -//C- | modifications or combinations. -//C- | -//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY -//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF -//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. -//C- +------------------------------------------------------------------ -// -// $Id: GString.h,v 1.24 2008/01/27 11:36:27 leonb Exp $ -// $Name: $ - -#ifndef _GSTRING_H_ -#define _GSTRING_H_ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#if NEED_GNUG_PRAGMAS -# pragma interface -#endif - -/** @name GString.h - - Files #"GString.h"# and #"GString.cpp"# implement a general - purpose string class \Ref{GBaseString}, with dirived types - \Ref{GUTF8String} and \Ref{GNativeString} for UTF8 MBS encoding - and the current Native MBS encoding respectively. This - implementation relies on smart pointers (see - \Ref{GSmartPointer.h}). - - {\bf Historical Comments} --- At some point during the DjVu - research era, it became clear that C++ compilers rarely provided - portable libraries. We then decided to avoid fancy classes (like - #iostream# or #string#) and to rely only on the good old C - library. A good string class however is very useful. We had - already randomly picked letter 'G' to prefix class names and we - logically derived the new class name. Native English speakers - kept laughing in hiding. This is ironic because we completely - forgot this letter 'G' when creating more challenging things - like the ZP Coder or the IW44 wavelets. - - {\bf Later Changes} - When converting to I18N, we (Lizardtech) decided that two string classes - where needing, replacing the original GString with \Ref{GUTF8String} and - \Ref{GNativeString}. - - @memo - General purpose string class. - @author - L\'eon Bottou -- initial implementation.\\ - -// From: Leon Bottou, 1/31/2002 -// This file has very little to do with my initial implementation. -// It has been practically rewritten by Lizardtech for i18n changes. -// My original implementation was very small in comparison -// . -// In my opinion, the duplication of the string classes is a failed -// attempt to use the type system to enforce coding policies. -// This could be fixed. But there are better things to do in djvulibre. - - @version - #$Id: GString.h,v 1.24 2008/01/27 11:36:27 leonb Exp $# */ -//@{ - - -#include "DjVuGlobal.h" -#include "GContainer.h" - -#include -#include -#ifdef WIN32 -# define HAS_WCHAR 1 -# define HAS_MBSTATE 1 -#endif - -#if HAS_WCHAR -# if !defined(AUTOCONF) || HAVE_WCHAR_H -# include -# endif -#endif - - -#ifdef HAVE_NAMESPACES -namespace DJVU { -# ifdef NOT_DEFINED // Just to fool emacs c++ mode -} -#endif -#endif - -#if !HAS_MBSTATE -# ifndef HAVE_MBSTATE_T -typedef int mbstate_t; -# endif -#endif - -class GBaseString; -class GUTF8String; -class GNativeString; - -// Internal string representation. -class DJVUAPI GStringRep : public GPEnabled -{ -public: - enum EncodeType { XUCS4, XUCS4BE, XUCS4LE, XUCS4_2143, XUCS4_3412, - XUTF16, XUTF16BE, XUTF16LE, XUTF8, XEBCDIC, XOTHER } ; - - enum EscapeMode { UNKNOWN_ESCAPED=0, IS_ESCAPED=1, NOT_ESCAPED=2 }; - - class UTF8; - friend class UTF8; - class Unicode; - friend class Unicode; - - class ChangeLocale; -#if HAS_WCHAR - class Native; - friend class Native; -#endif // HAS_WCHAR - friend class GBaseString; - friend class GUTF8String; - friend class GNativeString; - friend DJVUAPI unsigned int hash(const GBaseString &ref); - -public: - // default constructor - GStringRep(void); - // virtual destructor - virtual ~GStringRep(); - - // Other virtual methods. - // Create an empty string. - virtual GP blank(const unsigned int sz) const = 0; - // Create a duplicate at the given size. - GP getbuf(int n) const; - // Change the value of one of the bytes. - GP setat(int n, char ch) const; - // Append a string. - virtual GP append(const GP &s2) const = 0; - // Test if isUTF8. - virtual bool isUTF8(void) const { return false; } - // Test if Native. - virtual bool isNative(void) const { return false; } - // Convert to Native. - virtual GP toNative( - const EscapeMode escape=UNKNOWN_ESCAPED ) const = 0; - // Convert to UTF8. - virtual GP toUTF8(const bool nothrow=false) const = 0; - // Convert to same as current class. - virtual GP toThis( - const GP &rep,const GP &locale=0) const = 0; - // Compare with #s2#. - virtual int cmp(const GP &s2,const int len=(-1)) const = 0; - - // Convert strings to numbers. - virtual int toInt(void) const = 0; - virtual long int toLong( - const int pos, int &endpos, const int base=10) const = 0; - virtual unsigned long toULong( - const int pos, int &endpos, const int base=10) const = 0; - virtual double toDouble(const int pos, int &endpos) const = 0; - - // return the position of the next character - int nextChar( const int from=0 ) const; - - // return next non space position - int nextNonSpace( const int from=0, const int len=(-1) ) const; - - // return next white space position - int nextSpace( const int from=0, const int len=(-1) ) const; - - // return the position after the last non-whitespace character. - int firstEndSpace( int from=0, const int len=(-1) ) const; - - // Create an empty string. - template static GP create( - const unsigned int sz,TYPE *); - // Creates with a strdup string. - GP strdup(const char *s) const; - - // Creates by appending to the current string - GP append(const char *s2) const; - - // Creates with a concat operation. - GP concat(const GP &s1,const GP &s2) const; - GP concat(const char *s1,const GP &s2) const; - GP concat(const GP &s1,const char *s2) const; - GP concat(const char *s1,const char *s2) const; - - /* Creates with a strdup and substr. Negative values have strlen(s)+1 - added to them. - */ - GP substr( - const char *s,const int start,const int length=(-1)) const; - - GP substr( - const unsigned short *s,const int start,const int length=(-1)) const; - - GP substr( - const unsigned long *s,const int start,const int length=(-1)) const; - - /** Initializes a string with a formatted string (as in #vprintf#). The - string is re-initialized with the characters generated according to the - specified format #fmt# and using the optional arguments. See the ANSI-C - function #vprintf()# for more information. The current implementation - will cause a segmentation violation if the resulting string is longer - than 32768 characters. */ - GP vformat(va_list args) const; - // -- SEARCHING - - static GP UTF8ToNative( const char *s, - const EscapeMode escape=UNKNOWN_ESCAPED ); - static GP NativeToUTF8( const char *s ); - - // Creates an uppercase version of the current string. - GP upcase(void) const; - // Creates a lowercase version of the current string. - GP downcase(void) const; - - /** Returns the next UCS4 character, and updates the pointer s. */ - static unsigned long UTF8toUCS4( - unsigned char const *&s, void const * const endptr ); - - /** Returns the number of bytes in next UCS4 character, - and sets #w# to the next UCS4 chacter. */ - static int UTF8toUCS4( - unsigned long &w, unsigned char const s[], void const * const endptr ) - { unsigned char const *r=s;w=UTF8toUCS4(r,endptr);return (int)((size_t)r-(size_t)s); } - - /** Returns the next UCS4 word from the UTF16 string. */ - static int UTF16toUCS4( - unsigned long &w, unsigned short const * const s,void const * const eptr); - - static int UCS4toUTF16( - unsigned long w, unsigned short &w1, unsigned short &w2); - - int cmp(const char *s2, const int len=(-1)) const; - static int cmp( - const GP &s1, const GP &s2, const int len=(-1)) ; - static int cmp( - const GP &s1, const char *s2, const int len=(-1)); - static int cmp( - const char *s1, const GP &s2, const int len=(-1)); - static int cmp( - const char *s1, const char *s2, const int len=(-1)); - - // Lookup the next character, and return the position of the next character. - int getUCS4(unsigned long &w, const int from) const; - - virtual unsigned char *UCS4toString( - const unsigned long w, unsigned char *ptr, mbstate_t *ps=0) const = 0; - - static unsigned char *UCS4toUTF8( - const unsigned long w,unsigned char *ptr); - - static unsigned char *UCS4toNative( - const unsigned long w,unsigned char *ptr, mbstate_t *ps); - - int search(char c, int from=0) const; - - int search(char const *str, int from=0) const; - - int rsearch(char c, int from=0) const; - - int rsearch(char const *str, int from=0) const; - - int contains(char const accept[], int from=0) const; - - int rcontains(char const accept[], int from=0) const; - -protected: - // Return the next character and increment the source pointer. - virtual unsigned long getValidUCS4(const char *&source) const = 0; - - GP tocase( - bool (*xiswcase)(const unsigned long wc), - unsigned long (*xtowcase)(const unsigned long wc)) const; - - // Tests if the specified character passes the xiswtest. If so, the - // return pointer is incremented to the next character, otherwise the - // specified #ptr# is returned. - const char * isCharType( bool (*xiswtest)(const unsigned long wc), const char *ptr, - const bool reverse=false) const; - - // Find the next character position that passes the isCharType test. - int nextCharType( - bool (*xiswtest)(const unsigned long wc),const int from,const int len, - const bool reverse=false) const; - - static bool giswspace(const unsigned long w); - static bool giswupper(const unsigned long w); - static bool giswlower(const unsigned long w); - static unsigned long gtowupper(const unsigned long w); - static unsigned long gtowlower(const unsigned long w); - - virtual void set_remainder( void const * const buf, const unsigned int size, - const EncodeType encodetype); - virtual void set_remainder( void const * const buf, const unsigned int size, - const GP &encoding ); - virtual void set_remainder ( const GP &remainder ); - - virtual GP get_remainder( void ) const; - -public: - /* Returns a copy of this string with characters used in XML with - '<' to "<", '>' to ">", '&' to "&" '\'' to - "'", and '\"' to """. Characters 0x01 through - 0x1f are also escaped. */ - GP toEscaped( const bool tosevenbit ) const; - - // Tests if a string is legally encoded in the current character set. - virtual bool is_valid(void) const = 0; - - virtual int ncopy(wchar_t * const buf, const int buflen) const = 0; - -protected: - -// Actual string data. - int size; - char *data; -}; - -class DJVUAPI GStringRep::UTF8 : public GStringRep -{ -public: - // default constructor - UTF8(void); - // virtual destructor - virtual ~UTF8(); - - // Other virtual methods. - virtual GP blank(const unsigned int sz = 0) const; - virtual GP append(const GP &s2) const; - // Test if Native. - virtual bool isUTF8(void) const; - // Convert to Native. - virtual GP toNative( - const EscapeMode escape=UNKNOWN_ESCAPED) const; - // Convert to UTF8. - virtual GP toUTF8(const bool nothrow=false) const; - // Convert to same as current class. - virtual GP toThis( - const GP &rep,const GP &) const; - // Compare with #s2#. - virtual int cmp(const GP &s2,const int len=(-1)) const; - - static GP create(const unsigned int sz = 0); - - // Convert strings to numbers. - virtual int toInt(void) const; - virtual long int toLong( - const int pos, int &endpos, const int base=10) const; - virtual unsigned long toULong( - const int pos, int &endpos, const int base=10) const; - virtual double toDouble( - const int pos, int &endpos) const; - - // Create a strdup string. - static GP create(const char *s); - - // Creates with a concat operation. - static GP create( - const GP &s1,const GP &s2); - static GP create( const GP &s1,const char *s2); - static GP create( const char *s1, const GP &s2); - static GP create( const char *s1,const char *s2); - - // Create with a strdup and substr operation. - static GP create( - const char *s,const int start,const int length=(-1)); - - static GP create( - const unsigned short *s,const int start,const int length=(-1)); - - static GP create( - const unsigned long *s,const int start,const int length=(-1)); - - static GP create_format(const char fmt[],...); - static GP create(const char fmt[],va_list& args); - - virtual unsigned char *UCS4toString( - const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const; - - // Tests if a string is legally encoded in the current character set. - virtual bool is_valid(void) const; - - virtual int ncopy(wchar_t * const buf, const int buflen) const; - - friend class GBaseString; - -protected: - // Return the next character and increment the source pointer. - virtual unsigned long getValidUCS4(const char *&source) const; -}; - - -/** General purpose character string. - Each dirivied instance of class #GBaseString# represents a - character string. Overloaded operators provide a value semantic - to #GBaseString# objects. Conversion operators and constructors - transparently convert between #GBaseString# objects and - #const char*# pointers. The #GBaseString# class has no public - constructors, since a dirived type should always be used - to specify the desired multibyte character encoding. - - Functions taking strings as arguments should declare their - arguments as "#const char*#". Such functions will work equally - well with dirived #GBaseString# objects since there is a fast - conversion operator from the dirivied #GBaseString# objects - to "#const char*#". Functions returning strings should return - #GUTF8String# or #GNativeString# objects because the class will - automatically manage the necessary memory. - - Characters in the string can be identified by their position. The - first character of a string is numbered zero. Negative positions - represent characters relative to the end of the string (i.e. - position #-1# accesses the last character of the string, - position #-2# represents the second last character, etc.) */ - -class DJVUAPI GBaseString : protected GP -{ -public: - enum EscapeMode { - UNKNOWN_ESCAPED=GStringRep::UNKNOWN_ESCAPED, - IS_ESCAPED=GStringRep::IS_ESCAPED, - NOT_ESCAPED=GStringRep::NOT_ESCAPED }; - - friend class GUTF8String; - friend class GNativeString; -protected: - // Sets the gstr pointer; - inline void init(void); - - ~GBaseString(); - inline GBaseString &init(const GP &rep); - - // -- CONSTRUCTORS - /** Null constructor. Constructs an empty string. */ - GBaseString( void ); - -public: - // -- ACCESS - /** Converts a string into a constant null terminated character - array. This conversion operator is very efficient because - it simply returns a pointer to the internal string data. The - returned pointer remains valid as long as the string is - unmodified. */ - operator const char* ( void ) const ; - /// Returns the string length. - unsigned int length( void ) const; - /** Returns true if and only if the string contains zero characters. - This operator is useful for conditional expression in control - structures. - \begin{verbatim} - if (! str) { ... } - while (!! str) { ... } -- Note the double operator! - \end{verbatim} - Class #GBaseString# does not to support syntax - "#if# #(str)# #{}#" because the required conversion operator - introduces dangerous ambiguities with certain compilers. */ - bool operator! ( void ) const; - - // -- INDEXING - /** Returns the character at position #n#. An exception - \Ref{GException} is thrown if number #n# is not in range #-len# - to #len-1#, where #len# is the length of the string. The first - character of a string is numbered zero. Negative positions - represent characters relative to the end of the string. */ - char operator[] (int n) const; - /// Returns #TRUE# if the string contains an integer number. - bool is_int(void) const; - /// Returns #TRUE# if the string contains a float number. - bool is_float(void) const; - - /** Converts strings between native & UTF8 **/ - GNativeString getUTF82Native( EscapeMode escape=UNKNOWN_ESCAPED ) const; - GUTF8String getNative2UTF8( void ) const; - - // -- ALTERING - /// Reinitializes a string with the null string. - void empty( void ); - // -- SEARCHING - /** Searches character #c# in the string, starting at position - #from# and scanning forward until reaching the end of the - string. This function returns the position of the matching - character. It returns #-1# if character #c# cannot be found. */ - int search(char c, int from=0) const; - - /** Searches sub-string #str# in the string, starting at position - #from# and scanning forward until reaching the end of the - string. This function returns the position of the first - matching character of the sub-string. It returns #-1# if - string #str# cannot be found. */ - int search(const char *str, int from=0) const; - - /** Searches character #c# in the string, starting at position - #from# and scanning backwards until reaching the beginning of - the string. This function returns the position of the matching - character. It returns #-1# if character #c# cannot be found. */ - int rsearch(char c, const int from=0) const; - /** Searches sub-string #str# in the string, starting at position - #from# and scanning backwards until reaching the beginning of - the string. This function returns the position of the first - matching character of the sub-string. It returns #-1# if - string #str# cannot be found. */ - int rsearch(const char *str, const int from=0) const; - /** Searches for any of the specified characters in the accept - string. It returns #-1# if the none of the characters and - be found, otherwise the position of the first match. */ - int contains(const char accept[], const int from=0) const; - /** Searches for any of the specified characters in the accept - string. It returns #-1# if the none of the characters and be - found, otherwise the position of the last match. */ - int rcontains(const char accept[], const int from=0) const; - - /** Concatenates strings. Returns a string composed by concatenating - the characters of strings #s1# and #s2#. */ - GUTF8String operator+(const GUTF8String &s2) const; - GNativeString operator+(const GNativeString &s2) const; - - /** Returns an integer. Implements i18n atoi. */ - int toInt(void) const; - - /** Returns a long intenger. Implments i18n strtol. */ - long toLong(const int pos, int &endpos, const int base=10) const; - - /** Returns a unsigned long integer. Implements i18n strtoul. */ - unsigned long toULong( - const int pos, int &endpos, const int base=10) const; - - /** Returns a double. Implements the i18n strtod. */ - double toDouble( - const int pos, int &endpos ) const; - - /** Returns a long intenger. Implments i18n strtol. */ - static long toLong( - const GUTF8String& src, const int pos, int &endpos, const int base=10); - - static unsigned long toULong( - const GUTF8String& src, const int pos, int &endpos, const int base=10); - - static double toDouble( - const GUTF8String& src, const int pos, int &endpos); - - /** Returns a long intenger. Implments i18n strtol. */ - static long toLong( - const GNativeString& src, const int pos, int &endpos, const int base=10); - - static unsigned long toULong( - const GNativeString& src, const int pos, int &endpos, const int base=10); - - static double toDouble( - const GNativeString& src, const int pos, int &endpos); - - // -- HASHING - - // -- COMPARISONS - /** Returns an #int#. Compares string with #s2# and returns - sorting order. */ - int cmp(const GBaseString &s2, const int len=(-1)) const; - /** Returns an #int#. Compares string with #s2# and returns - sorting order. */ - int cmp(const char *s2, const int len=(-1)) const; - /** Returns an #int#. Compares string with #s2# and returns - sorting order. */ - int cmp(const char s2) const; - /** Returns an #int#. Compares #s2# with #s2# and returns - sorting order. */ - static int cmp(const char *s1, const char *s2, const int len=(-1)); - /** Returns a boolean. The Standard C strncmp takes two string and - compares the first N characters. static bool GBaseString::ncmp - will compare #s1# with #s2# with the #len# characters starting - from the beginning of the string. */ - /** String comparison. Returns true if and only if character - strings #s1# and #s2# are equal (as with #strcmp#.) - */ - bool operator==(const GBaseString &s2) const; - bool operator==(const char *s2) const; - friend bool operator==(const char *s1, const GBaseString &s2); - - /** String comparison. Returns true if and only if character - strings #s1# and #s2# are not equal (as with #strcmp#.) - */ - bool operator!=(const GBaseString &s2) const; - bool operator!=(const char *s2) const; - friend bool operator!=(const char *s1, const GBaseString &s2); - - /** String comparison. Returns true if and only if character - strings #s1# is lexicographically greater than or equal to - string #s2# (as with #strcmp#.) */ - bool operator>=(const GBaseString &s2) const; - bool operator>=(const char *s2) const; - bool operator>=(const char s2) const; - friend bool operator>=(const char *s1, const GBaseString &s2); - friend bool operator>=(const char s1, const GBaseString &s2); - - /** String comparison. Returns true if and only if character - strings #s1# is lexicographically less than string #s2# - (as with #strcmp#.) - */ - bool operator<(const GBaseString &s2) const; - bool operator<(const char *s2) const; - bool operator<(const char s2) const; - friend bool operator<(const char *s1, const GBaseString &s2); - friend bool operator<(const char s1, const GBaseString &s2); - - /** String comparison. Returns true if and only if character - strings #s1# is lexicographically greater than string #s2# - (as with #strcmp#.) - */ - bool operator> (const GBaseString &s2) const; - bool operator> (const char *s2) const; - bool operator> (const char s2) const; - friend bool operator> (const char *s1, const GBaseString &s2); - friend bool operator> (const char s1, const GBaseString &s2); - - /** String comparison. Returns true if and only if character - strings #s1# is lexicographically less than or equal to string - #s2# (as with #strcmp#.) - */ - bool operator<=(const GBaseString &s2) const; - bool operator<=(const char *s2) const; - bool operator<=(const char s2) const; - friend bool operator<=(const char *s1, const GBaseString &s2); - friend bool operator<=(const char s1, const GBaseString &s2); - - /** Returns an integer. Implements a functional i18n atoi. Note - that if you pass a GBaseString that is not in Native format - the results may be disparaging. */ - - /** Returns a hash code for the string. This hashing function - helps when creating associative maps with string keys (see - \Ref{GMap}). This hash code may be reduced to an arbitrary - range by computing its remainder modulo the upper bound of - the range. */ - friend DJVUAPI unsigned int hash(const GBaseString &ref); - // -- HELPERS - friend class GStringRep; - - /// Returns next non space position. - int nextNonSpace( const int from=0, const int len=(-1) ) const; - - /// Returns next character position. - int nextChar( const int from=0 ) const; - - /// Returns next non space position. - int nextSpace( const int from=0, const int len=(-1) ) const; - - /// return the position after the last non-whitespace character. - int firstEndSpace( const int from=0,const int len=(-1) ) const; - - /// Tests if the string is legally encoded in the current codepage. - bool is_valid(void) const; - - /// copy to a wchar_t buffer - int ncopy(wchar_t * const buf, const int buflen) const; - -protected: - const char *gstr; - static void throw_illegal_subscript() no_return; - static const char *nullstr; -public: - GNativeString UTF8ToNative( - const bool currentlocale=false, - const EscapeMode escape=UNKNOWN_ESCAPED) const; - GUTF8String NativeToUTF8(void) const; -protected: - inline int CheckSubscript(int n) const; -}; - -/** General purpose character string. - Each instance of class #GUTF8String# represents a character - string. Overloaded operators provide a value semantic to - #GUTF8String# objects. Conversion operators and constructors - transparently convert between #GUTF8String# objects and - #const char*# pointers. - - Functions taking strings as arguments should declare their - arguments as "#const char*#". Such functions will work equally - well with #GUTF8String# objects since there is a fast conversion - operator from #GUTF8String# to "#const char*#". Functions - returning strings should return #GUTF8String# or #GNativeString# - objects because the class will automatically manage the necessary - memory. - - Characters in the string can be identified by their position. The - first character of a string is numbered zero. Negative positions - represent characters relative to the end of the string (i.e. - position #-1# accesses the last character of the string, - position #-2# represents the second last character, etc.) */ - -class DJVUAPI GUTF8String : public GBaseString -{ -public: - ~GUTF8String(); - inline void init(void); - - inline GUTF8String &init(const GP &rep); - - // -- CONSTRUCTORS - /** Null constructor. Constructs an empty string. */ - GUTF8String(void); - /// Constructs a string from a character. - GUTF8String(const char dat); - /// Constructs a string from a null terminated character array. - GUTF8String(const char *str); - /// Constructs a string from a null terminated character array. - GUTF8String(const unsigned char *str); - GUTF8String(const unsigned short *dat); - GUTF8String(const unsigned long *dat); - /** Constructs a string from a character array. Elements of the - character array #dat# are added into the string until the - string length reaches #len# or until encountering a null - character (whichever comes first). */ - GUTF8String(const char *dat, unsigned int len); - GUTF8String(const unsigned short *dat, unsigned int len); - GUTF8String(const unsigned long *dat, unsigned int len); - - /// Construct from base class. - GUTF8String(const GP &str); - GUTF8String(const GBaseString &str); - GUTF8String(const GUTF8String &str); - GUTF8String(const GNativeString &str); - /** Constructs a string from a character array. Elements of the - character array #dat# are added into the string until the - string length reaches #len# or until encountering a null - character (whichever comes first). */ - GUTF8String(const GBaseString &gs, int from, int len); - - /** Copy a null terminated character array. Resets this string - with the character string contained in the null terminated - character array #str#. */ - GUTF8String& operator= (const char str); - GUTF8String& operator= (const char *str); - inline GUTF8String& operator= (const GP &str); - inline GUTF8String& operator= (const GBaseString &str); - inline GUTF8String& operator= (const GUTF8String &str); - inline GUTF8String& operator= (const GNativeString &str); - - /** Constructs a string with a formatted string (as in #vprintf#). - The string is re-initialized with the characters generated - according to the specified format #fmt# and using the optional - arguments. See the ANSI-C function #vprintf()# for more - information. The current implementation will cause a - segmentation violation if the resulting string is longer - than 32768 characters. */ - GUTF8String(const GUTF8String &fmt, va_list &args); - - /// Constructs a string from a character. - /** Constructs a string with a human-readable representation of - integer #number#. The format is similar to format #"%d"# in - function #printf#. */ - GUTF8String(const int number); - - /** Constructs a string with a human-readable representation of - floating point number #number#. The format is similar to - format #"%f"# in function #printf#. */ - GUTF8String(const double number); - - - /** Initializes a string with a formatted string (as in #printf#). - The string is re-initialized with the characters generated - according to the specified format #fmt# and using the optional - arguments. See the ANSI-C function #printf()# for more - information. The current implementation will cause a - segmentation violation if the resulting string is longer - than 32768 characters. */ - GUTF8String &format(const char *fmt, ... ); - /** Initializes a string with a formatted string (as in #vprintf#). - The string is re-initialized with the characters generated - according to the specified format #fmt# and using the optional - arguments. See the ANSI-C function #vprintf()# for more - information. The current implementation will cause a - segmentation violation if the resulting string is longer - than 32768 characters. */ - GUTF8String &vformat(const GUTF8String &fmt, va_list &args); - - /** Returns a copy of this string with characters used in XML with - '<' to "<", '>' to ">", '&' to "&" '\'' to - "'", and '\"' to """. Characters 0x01 through - 0x1f are also escaped. */ - GUTF8String toEscaped( const bool tosevenbit=false ) const; - - /** Converts strings containing HTML/XML escaped characters into - their unescaped forms. Numeric representations of characters - (e.g., "&" or "&" for "*") are the only forms - converted by this function. */ - GUTF8String fromEscaped( void ) const; - - /** Converts strings containing HTML/XML escaped characters - (e.g., "<" for "<") into their unescaped forms. The - conversion is partially defined by the ConvMap argument which - specifies the conversion strings to be recognized. Numeric - representations of characters (e.g., "&" or "&" - for "*") are always converted. */ - GUTF8String fromEscaped( - const GMap ConvMap ) const; - - - // -- CONCATENATION - /// Appends character #ch# to the string. - GUTF8String& operator+= (char ch); - - /// Appends the null terminated character array #str# to the string. - GUTF8String& operator+= (const char *str); - /// Appends the specified GBaseString to the string. - GUTF8String& operator+= (const GBaseString &str); - - /** Returns a sub-string. The sub-string is composed by copying - #len# characters starting at position #from# in this string. - The length of the resulting string may be smaller than #len# - if the specified range is too large. */ - GUTF8String substr(int from, int len/*=(-1)*/) const; - - /** Returns an upper case copy of this string. The returned string - contains a copy of the current string with all letters turned - into upper case letters. */ - GUTF8String upcase( void ) const; - /** Returns an lower case copy of this string. The returned string - contains a copy of the current string with all letters turned - into lower case letters. */ - GUTF8String downcase( void ) const; - - /** Concatenates strings. Returns a string composed by concatenating - the characters of strings #s1# and #s2#. - */ - GUTF8String operator+(const GBaseString &s2) const; - GUTF8String operator+(const GUTF8String &s2) const; - GUTF8String operator+(const GNativeString &s2) const; - GUTF8String operator+(const char *s2) const; - friend DJVUAPI GUTF8String operator+(const char *s1, const GUTF8String &s2); - - /** Provides a direct access to the string buffer. Returns a - pointer for directly accessing the string buffer. This pointer - valid remains valid as long as the string is not modified by - other means. Positive values for argument #n# represent the - length of the returned buffer. The returned string buffer will - be large enough to hold at least #n# characters plus a null - character. If #n# is positive but smaller than the string - length, the string will be truncated to #n# characters. */ - char *getbuf(int n = -1); - /** Set the character at position #n# to value #ch#. An exception - \Ref{GException} is thrown if number #n# is not in range #-len# - to #len#, where #len# is the length of the string. If character - #ch# is zero, the string is truncated at position #n#. The - first character of a string is numbered zero. Negative - positions represent characters relative to the end of the - string. If position #n# is equal to the length of the string, - this function appends character #ch# to the end of the string. */ - void setat(const int n, const char ch); -public: - typedef enum GStringRep::EncodeType EncodeType; - static GUTF8String create(void const * const buf, - const unsigned int size, - const EncodeType encodetype, const GUTF8String &encoding); - static GUTF8String create( void const * const buf, - unsigned int size, const EncodeType encodetype ); - static GUTF8String create( void const * const buf, - const unsigned int size, const GUTF8String &encoding ); - static GUTF8String create( void const * const buf, - const unsigned int size, const GP &remainder); - GP get_remainder(void) const; - static GUTF8String create( const char *buf, const unsigned int bufsize ); - static GUTF8String create( const unsigned short *buf, const unsigned int bufsize ); - static GUTF8String create( const unsigned long *buf, const unsigned int bufsize ); -}; - - -#if !HAS_WCHAR -#define GBaseString GUTF8String -#endif - -/** General purpose character string. - Each instance of class #GNativeString# represents a character - string. Overloaded operators provide a value semantic to - #GNativeString# objects. Conversion operators and constructors - transparently convert between #GNativeString# objects and - #const char*# pointers. - - Functions taking strings as arguments should declare their - arguments as "#const char*#". Such functions will work equally - well with #GNativeString# objects since there is a fast conversion - operator from #GNativeString# to "#const char*#". Functions - returning strings should return #GUTF8String# or #GNativeString# - objects because the class will automatically manage the necessary - memory. - - Characters in the string can be identified by their position. The - first character of a string is numbered zero. Negative positions - represent characters relative to the end of the string (i.e. - position #-1# accesses the last character of the string, - position #-2# represents the second last character, etc.) */ - -class DJVUAPI GNativeString : public GBaseString -{ -public: - ~GNativeString(); - // -- CONSTRUCTORS - /** Null constructor. Constructs an empty string. */ - GNativeString(void); - /// Constructs a string from a character. - GNativeString(const char dat); - /// Constructs a string from a null terminated character array. - GNativeString(const char *str); - /// Constructs a string from a null terminated character array. - GNativeString(const unsigned char *str); - GNativeString(const unsigned short *str); - GNativeString(const unsigned long *str); - /** Constructs a string from a character array. Elements of the - character array #dat# are added into the string until the - string length reaches #len# or until encountering a null - character (whichever comes first). */ - GNativeString(const char *dat, unsigned int len); - GNativeString(const unsigned short *dat, unsigned int len); - GNativeString(const unsigned long *dat, unsigned int len); - /// Construct from base class. - GNativeString(const GP &str); - GNativeString(const GBaseString &str); -#if HAS_WCHAR - GNativeString(const GUTF8String &str); -#endif - GNativeString(const GNativeString &str); - /** Constructs a string from a character array. Elements of the - character array #dat# are added into the string until the - string length reaches #len# or until encountering a null - character (whichever comes first). */ - GNativeString(const GBaseString &gs, int from, int len); - - /** Constructs a string with a formatted string (as in #vprintf#). - The string is re-initialized with the characters generated - according to the specified format #fmt# and using the optional - arguments. See the ANSI-C function #vprintf()# for more - information. The current implementation will cause a - segmentation violation if the resulting string is longer than - 32768 characters. */ - GNativeString(const GNativeString &fmt, va_list &args); - - /** Constructs a string with a human-readable representation of - integer #number#. The format is similar to format #"%d"# in - function #printf#. */ - GNativeString(const int number); - - /** Constructs a string with a human-readable representation of - floating point number #number#. The format is similar to - format #"%f"# in function #printf#. */ - GNativeString(const double number); - -#if !HAS_WCHAR -#undef GBaseString -#else - /// Initialize this string class - void init(void); - - /// Initialize this string class - GNativeString &init(const GP &rep); - - /** Copy a null terminated character array. Resets this string with - the character string contained in the null terminated character - array #str#. */ - GNativeString& operator= (const char str); - GNativeString& operator= (const char *str); - inline GNativeString& operator= (const GP &str); - inline GNativeString& operator= (const GBaseString &str); - inline GNativeString& operator= (const GUTF8String &str); - inline GNativeString& operator= (const GNativeString &str); - // -- CONCATENATION - /// Appends character #ch# to the string. - GNativeString& operator+= (char ch); - /// Appends the null terminated character array #str# to the string. - GNativeString& operator+= (const char *str); - /// Appends the specified GBaseString to the string. - GNativeString& operator+= (const GBaseString &str); - - /** Returns a sub-string. The sub-string is composed by copying - #len# characters starting at position #from# in this string. - The length of the resulting string may be smaller than #len# - if the specified range is too large. */ - GNativeString substr(int from, int len/*=(-1)*/) const; - - /** Returns an upper case copy of this string. The returned - string contains a copy of the current string with all letters - turned into upper case letters. */ - GNativeString upcase( void ) const; - /** Returns an lower case copy of this string. The returned - string contains a copy of the current string with all letters - turned into lower case letters. */ - GNativeString downcase( void ) const; - - - GNativeString operator+(const GBaseString &s2) const; - GNativeString operator+(const GNativeString &s2) const; - GUTF8String operator+(const GUTF8String &s2) const; - GNativeString operator+(const char *s2) const; - friend DJVUAPI GNativeString operator+(const char *s1, const GNativeString &s2); - - /** Initializes a string with a formatted string (as in #printf#). - The string is re-initialized with the characters generated - according to the specified format #fmt# and using the optional - arguments. See the ANSI-C function #printf()# for more - information. The current implementation will cause a - segmentation violation if the resulting string is longer than - 32768 characters. */ - GNativeString &format(const char *fmt, ... ); - /** Initializes a string with a formatted string (as in #vprintf#). - The string is re-initialized with the characters generated - according to the specified format #fmt# and using the optional - arguments. See the ANSI-C function #vprintf()# for more - information. The current implementation will cause a - segmentation violation if the resulting string is longer than - 32768 characters. */ - GNativeString &vformat(const GNativeString &fmt, va_list &args); - - /** Returns a copy of this string with characters used in XML with - '<' to "<", '>' to ">", '&' to "&" '\'' to - "'", and '\"' to """. Characters 0x01 through - 0x1f are also escaped. */ - GNativeString toEscaped( const bool tosevenbit=false ) const; - - - /** Provides a direct access to the string buffer. Returns a - pointer for directly accessing the string buffer. This - pointer valid remains valid as long as the string is not - modified by other means. Positive values for argument #n# - represent the length of the returned buffer. The returned - string buffer will be large enough to hold at least #n# - characters plus a null character. If #n# is positive but - smaller than the string length, the string will be truncated - to #n# characters. */ - char *getbuf(int n = -1); - /** Set the character at position #n# to value #ch#. An exception - \Ref{GException} is thrown if number #n# is not in range #-len# - to #len#, where #len# is the length of the string. If - character #ch# is zero, the string is truncated at position - #n#. The first character of a string is numbered zero. - Negative positions represent characters relative to the end of - the string. If position #n# is equal to the length of the - string, this function appends character #ch# to the end of the - string. */ - void setat(const int n, const char ch); - - static GNativeString create( const char *buf, const unsigned int bufsize ); - static GNativeString create( const unsigned short *buf, const unsigned int bufsize ); - static GNativeString create( const unsigned long *buf, const unsigned int bufsize ); -#endif // WinCE -}; - -//@} - -inline -GBaseString::operator const char* ( void ) const -{ - return ptr?(*this)->data:nullstr; -} - -inline unsigned int -GBaseString::length( void ) const -{ - return ptr ? (*this)->size : 0; -} - -inline bool -GBaseString::operator! ( void ) const -{ - return !ptr; -} - -inline GUTF8String -GUTF8String::upcase( void ) const -{ - if (ptr) return (*this)->upcase(); - return *this; -} - -inline GUTF8String -GUTF8String::downcase( void ) const -{ - if (ptr) return (*this)->downcase(); - return *this; -} - -inline void -GUTF8String::init(void) -{ GBaseString::init(); } - -inline GUTF8String & -GUTF8String::init(const GP &rep) -{ GP::operator=(rep?rep->toUTF8(true):rep); init(); return *this; } - -inline GUTF8String & -GUTF8String::vformat(const GUTF8String &fmt, va_list &args) -{ return (*this = (fmt.ptr?GUTF8String(fmt,args):fmt)); } - -inline GUTF8String -GUTF8String::toEscaped( const bool tosevenbit ) const -{ return ptr?GUTF8String((*this)->toEscaped(tosevenbit)):(*this); } - -inline GP -GUTF8String::get_remainder(void) const -{ - GP retval; - if(ptr) - retval=((*this)->get_remainder()); - return retval; -} - -inline -GUTF8String::GUTF8String(const GNativeString &str) -{ init(str.length()?(str->toUTF8(true)):(GP)str); } - -inline -GUTF8String::GUTF8String(const GP &str) -{ init(str?(str->toUTF8(true)):str); } - -inline -GUTF8String::GUTF8String(const GBaseString &str) -{ init(str.length()?(str->toUTF8(true)):(GP)str); } - -inline void -GBaseString::init(void) -{ - gstr=ptr?((*this)->data):nullstr; -} -/** Returns an integer. Implements i18n atoi. */ -inline int -GBaseString::toInt(void) const -{ return ptr?(*this)->toInt():0; } - -/** Returns a long intenger. Implments i18n strtol. */ -inline long -GBaseString::toLong(const int pos, int &endpos, const int base) const -{ - long int retval=0; - if(ptr) - { - retval=(*this)->toLong(pos, endpos, base); - }else - { - endpos=(-1); - } - return retval; -} - -inline long -GBaseString::toLong( - const GUTF8String& src, const int pos, int &endpos, const int base) -{ - return src.toLong(pos,endpos,base); -} - -inline long -GBaseString::toLong( - const GNativeString& src, const int pos, int &endpos, const int base) -{ - return src.toLong(pos,endpos,base); -} - -/** Returns a unsigned long integer. Implements i18n strtoul. */ -inline unsigned long -GBaseString::toULong(const int pos, int &endpos, const int base) const -{ - unsigned long retval=0; - if(ptr) - { - retval=(*this)->toULong(pos, endpos, base); - }else - { - endpos=(-1); - } - return retval; -} - -inline unsigned long -GBaseString::toULong( - const GUTF8String& src, const int pos, int &endpos, const int base) -{ - return src.toULong(pos,endpos,base); -} - -inline unsigned long -GBaseString::toULong( - const GNativeString& src, const int pos, int &endpos, const int base) -{ - return src.toULong(pos,endpos,base); -} - -/** Returns a double. Implements the i18n strtod. */ -inline double -GBaseString::toDouble( - const int pos, int &endpos ) const -{ - double retval=(double)0; - if(ptr) - { - retval=(*this)->toDouble(pos, endpos); - }else - { - endpos=(-1); - } - return retval; -} - -inline double -GBaseString::toDouble( - const GUTF8String& src, const int pos, int &endpos) -{ - return src.toDouble(pos,endpos); -} - -inline double -GBaseString::toDouble( - const GNativeString& src, const int pos, int &endpos) -{ - return src.toDouble(pos,endpos); -} - -inline GBaseString & -GBaseString::init(const GP &rep) -{ GP::operator=(rep); init(); return *this;} - -inline char -GBaseString::operator[] (int n) const -{ return ((n||ptr)?((*this)->data[CheckSubscript(n)]):0); } - -inline int -GBaseString::search(char c, int from) const -{ return ptr?((*this)->search(c,from)):(-1); } - -inline int -GBaseString::search(const char *str, int from) const -{ return ptr?((*this)->search(str,from)):(-1); } - -inline int -GBaseString::rsearch(char c, const int from) const -{ return ptr?((*this)->rsearch(c,from)):(-1); } - -inline int -GBaseString::rsearch(const char *str, const int from) const -{ return ptr?((*this)->rsearch(str,from)):(-1); } - -inline int -GBaseString::contains(const char accept[], const int from) const -{ return ptr?((*this)->contains(accept,from)):(-1); } - -inline int -GBaseString::rcontains(const char accept[], const int from) const -{ return ptr?((*this)->rcontains(accept,from)):(-1); } - -inline int -GBaseString::cmp(const GBaseString &s2, const int len) const -{ return GStringRep::cmp(*this,s2,len); } - -inline int -GBaseString::cmp(const char *s2, const int len) const -{ return GStringRep::cmp(*this,s2,len); } - -inline int -GBaseString::cmp(const char s2) const -{ return GStringRep::cmp(*this,&s2,1); } - -inline int -GBaseString::cmp(const char *s1, const char *s2, const int len) -{ return GStringRep::cmp(s1,s2,len); } - -inline bool -GBaseString::operator==(const GBaseString &s2) const -{ return !cmp(s2); } - -inline bool -GBaseString::operator==(const char *s2) const -{ return !cmp(s2); } - -inline bool -GBaseString::operator!=(const GBaseString &s2) const -{ return !!cmp(s2); } - -inline bool -GBaseString::operator!=(const char *s2) const -{ return !!cmp(s2); } - -inline bool -GBaseString::operator>=(const GBaseString &s2) const -{ return (cmp(s2)>=0); } - -inline bool -GBaseString::operator>=(const char *s2) const -{ return (cmp(s2)>=0); } - -inline bool -GBaseString::operator>=(const char s2) const -{ return (cmp(s2)>=0); } - -inline bool -GBaseString::operator<(const GBaseString &s2) const -{ return (cmp(s2)<0); } - -inline bool -GBaseString::operator<(const char *s2) const -{ return (cmp(s2)<0); } - -inline bool -GBaseString::operator<(const char s2) const -{ return (cmp(s2)<0); } - -inline bool -GBaseString::operator> (const GBaseString &s2) const -{ return (cmp(s2)>0); } - -inline bool -GBaseString::operator> (const char *s2) const -{ return (cmp(s2)>0); } - -inline bool -GBaseString::operator> (const char s2) const -{ return (cmp(s2)>0); } - -inline bool -GBaseString::operator<=(const GBaseString &s2) const -{ return (cmp(s2)<=0); } - -inline bool -GBaseString::operator<=(const char *s2) const -{ return (cmp(s2)<=0); } - -inline bool -GBaseString::operator<=(const char s2) const -{ return (cmp(s2)<=0); } - -inline int -GBaseString::nextNonSpace( const int from, const int len ) const -{ return ptr?(*this)->nextNonSpace(from,len):0; } - -inline int -GBaseString::nextChar( const int from ) const -{ return ptr?(*this)->nextChar(from):0; } - -inline int -GBaseString::nextSpace( const int from, const int len ) const -{ return ptr?(*this)->nextSpace(from,len):0; } - -inline int -GBaseString::firstEndSpace( const int from,const int len ) const -{ return ptr?(*this)->firstEndSpace(from,len):0; } - -inline bool -GBaseString::is_valid(void) const -{ return ptr?((*this)->is_valid()):true; } - -inline int -GBaseString::ncopy(wchar_t * const buf, const int buflen) const -{if(buf&&buflen)buf[0]=0;return ptr?((*this)->ncopy(buf,buflen)):0;} - -inline int -GBaseString::CheckSubscript(int n) const -{ - if(n) - { - if (n<0 && ptr) - n += (*this)->size; - if (n<0 || !ptr || n > (int)(*this)->size) - throw_illegal_subscript(); - } - return n; -} - -inline GBaseString::GBaseString(void) { init(); } - -inline GUTF8String::GUTF8String(void) { } - -inline GUTF8String::GUTF8String(const GUTF8String &str) -{ init(str); } - -inline GUTF8String& GUTF8String::operator= (const GP &str) -{ return init(str); } - -inline GUTF8String& GUTF8String::operator= (const GBaseString &str) -{ return init(str); } - -inline GUTF8String& GUTF8String::operator= (const GUTF8String &str) -{ return init(str); } - -inline GUTF8String& GUTF8String::operator= (const GNativeString &str) -{ return init(str); } - -inline GUTF8String -GUTF8String::create( const char *buf, const unsigned int bufsize ) -{ -#if HAS_WCHAR - return GNativeString(buf,bufsize); -#else - return GUTF8String(buf,bufsize); -#endif -} - -inline GUTF8String -GUTF8String::create( const unsigned short *buf, const unsigned int bufsize ) -{ - return GUTF8String(buf,bufsize); -} - -inline GUTF8String -GUTF8String::create( const unsigned long *buf, const unsigned int bufsize ) -{ - return GUTF8String(buf,bufsize); -} - -inline GNativeString::GNativeString(void) {} - -#if !HAS_WCHAR -// For Windows CE, GNativeString is essentially GUTF8String - -inline -GNativeString::GNativeString(const GUTF8String &str) -: GUTF8String(str) {} - -inline -GNativeString::GNativeString(const GP &str) -: GUTF8String(str) {} - -inline -GNativeString::GNativeString(const char dat) -: GUTF8String(dat) {} - -inline -GNativeString::GNativeString(const char *str) -: GUTF8String(str) {} - -inline -GNativeString::GNativeString(const unsigned char *str) -: GUTF8String(str) {} - -inline -GNativeString::GNativeString(const unsigned short *str) -: GUTF8String(str) {} - -inline -GNativeString::GNativeString(const unsigned long *str) -: GUTF8String(str) {} - -inline -GNativeString::GNativeString(const char *dat, unsigned int len) -: GUTF8String(dat,len) {} - -inline -GNativeString::GNativeString(const unsigned short *dat, unsigned int len) -: GUTF8String(dat,len) {} - -inline -GNativeString::GNativeString(const unsigned long *dat, unsigned int len) -: GUTF8String(dat,len) {} - -inline -GNativeString::GNativeString(const GNativeString &str) -: GUTF8String(str) {} - -inline -GNativeString::GNativeString(const int number) -: GUTF8String(number) {} - -inline -GNativeString::GNativeString(const double number) -: GUTF8String(number) {} - -inline -GNativeString::GNativeString(const GNativeString &fmt, va_list &args) -: GUTF8String(fmt,args) {} - -#else // HAS_WCHAR - -/// Initialize this string class -inline void -GNativeString::init(void) -{ GBaseString::init(); } - -/// Initialize this string class -inline GNativeString & -GNativeString::init(const GP &rep) -{ - GP::operator=(rep?rep->toNative(GStringRep::NOT_ESCAPED):rep); - init(); - return *this; -} - -inline GNativeString -GNativeString::substr(int from, int len) const -{ return GNativeString(*this, from, len); } - -inline GNativeString & -GNativeString::vformat(const GNativeString &fmt, va_list &args) -{ return (*this = (fmt.ptr?GNativeString(fmt,args):fmt)); } - -inline GNativeString -GNativeString::toEscaped( const bool tosevenbit ) const -{ return ptr?GNativeString((*this)->toEscaped(tosevenbit)):(*this); } - -inline -GNativeString::GNativeString(const GUTF8String &str) -{ - if (str.length()) - init(str->toNative(GStringRep::NOT_ESCAPED)); - else - init((GP)str); -} - -inline -GNativeString::GNativeString(const GP &str) -{ - if (str) - init(str->toNative(GStringRep::NOT_ESCAPED)); - else - init(str); -} - -inline -GNativeString::GNativeString(const GBaseString &str) -{ - if (str.length()) - init(str->toNative(GStringRep::NOT_ESCAPED)); - else - init((GP)str); -} - - -inline -GNativeString::GNativeString(const GNativeString &fmt, va_list &args) -{ - if (fmt.ptr) - init(fmt->vformat(args)); - else - init(fmt); -} - -inline GNativeString -GNativeString::create( const char *buf, const unsigned int bufsize ) -{ - return GNativeString(buf,bufsize); -} - -inline GNativeString -GNativeString::create( const unsigned short *buf, const unsigned int bufsize ) -{ - return GNativeString(buf,bufsize); -} - -inline GNativeString -GNativeString::create( const unsigned long *buf, const unsigned int bufsize ) -{ - return GNativeString(buf,bufsize); -} - -inline GNativeString& -GNativeString::operator= (const GP &str) -{ return init(str); } - -inline GNativeString& -GNativeString::operator= (const GBaseString &str) -{ return init(str); } - -inline GNativeString& -GNativeString::operator= (const GUTF8String &str) -{ return init(str); } - -inline GNativeString& -GNativeString::operator= (const GNativeString &str) -{ return init(str); } - -inline GNativeString -GNativeString::upcase( void ) const -{ - if (ptr) return (*this)->upcase(); - return *this; -} - -inline GNativeString -GNativeString::downcase( void ) const -{ - if (ptr) return (*this)->downcase(); - return *this; -} - -#endif // HAS_WCHAR - -inline bool -operator==(const char *s1, const GBaseString &s2) -{ return !s2.cmp(s1); } - -inline bool -operator!=(const char *s1, const GBaseString &s2) -{ return !!s2.cmp(s1); } - -inline bool -operator>=(const char *s1, const GBaseString &s2) -{ return (s2.cmp(s1)<=0); } - -inline bool -operator>=(const char s1, const GBaseString &s2) -{ return (s2.cmp(s1)<=0); } - -inline bool -operator<(const char *s1, const GBaseString &s2) -{ return (s2.cmp(s1)>0); } - -inline bool -operator<(const char s1, const GBaseString &s2) -{ return (s2.cmp(s1)>0); } - -inline bool -operator> (const char *s1, const GBaseString &s2) -{ return (s2.cmp(s1)<0); } - -inline bool -operator> (const char s1, const GBaseString &s2) -{ return (s2.cmp(s1)<0); } - -inline bool -operator<=(const char *s1, const GBaseString &s2) -{ return !(s1>s2); } - -inline bool -operator<=(const char s1, const GBaseString &s2) -{ return !(s1>s2); } - -// ------------------- The end - - -#ifdef HAVE_NAMESPACES -} -# ifndef NOT_USING_DJVU_NAMESPACE -using namespace DJVU; -# endif -#endif -#endif - +//C- -*- C++ -*- +//C- ------------------------------------------------------------------- +//C- DjVuLibre-3.5 +//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. +//C- Copyright (c) 2001 AT&T +//C- +//C- This software is subject to, and may be distributed under, the +//C- GNU General Public License, either Version 2 of the license, +//C- or (at your option) any later version. The license should have +//C- accompanied the software or you may obtain a copy of the license +//C- from the Free Software Foundation at http://www.fsf.org . +//C- +//C- This program is distributed in the hope that it will be useful, +//C- but WITHOUT ANY WARRANTY; without even the implied warranty of +//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//C- GNU General Public License for more details. +//C- +//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from +//C- Lizardtech Software. Lizardtech Software has authorized us to +//C- replace the original DjVu(r) Reference Library notice by the following +//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu): +//C- +//C- ------------------------------------------------------------------ +//C- | DjVu (r) Reference Library (v. 3.5) +//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. +//C- | The DjVu Reference Library is protected by U.S. Pat. No. +//C- | 6,058,214 and patents pending. +//C- | +//C- | This software is subject to, and may be distributed under, the +//C- | GNU General Public License, either Version 2 of the license, +//C- | or (at your option) any later version. The license should have +//C- | accompanied the software or you may obtain a copy of the license +//C- | from the Free Software Foundation at http://www.fsf.org . +//C- | +//C- | The computer code originally released by LizardTech under this +//C- | license and unmodified by other parties is deemed "the LIZARDTECH +//C- | ORIGINAL CODE." Subject to any third party intellectual property +//C- | claims, LizardTech grants recipient a worldwide, royalty-free, +//C- | non-exclusive license to make, use, sell, or otherwise dispose of +//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the +//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU +//C- | General Public License. This grant only confers the right to +//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to +//C- | the extent such infringement is reasonably necessary to enable +//C- | recipient to make, have made, practice, sell, or otherwise dispose +//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to +//C- | any greater extent that may be necessary to utilize further +//C- | modifications or combinations. +//C- | +//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY +//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF +//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. +//C- +------------------------------------------------------------------ +// +// $Id: GString.h,v 1.24 2008/01/27 11:36:27 leonb Exp $ +// $Name: $ + +#ifndef _GSTRING_H_ +#define _GSTRING_H_ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#if NEED_GNUG_PRAGMAS +# pragma interface +#endif + +/** @name GString.h + + Files #"GString.h"# and #"GString.cpp"# implement a general + purpose string class \Ref{GBaseString}, with dirived types + \Ref{GUTF8String} and \Ref{GNativeString} for UTF8 MBS encoding + and the current Native MBS encoding respectively. This + implementation relies on smart pointers (see + \Ref{GSmartPointer.h}). + + {\bf Historical Comments} --- At some point during the DjVu + research era, it became clear that C++ compilers rarely provided + portable libraries. We then decided to avoid fancy classes (like + #iostream# or #string#) and to rely only on the good old C + library. A good string class however is very useful. We had + already randomly picked letter 'G' to prefix class names and we + logically derived the new class name. Native English speakers + kept laughing in hiding. This is ironic because we completely + forgot this letter 'G' when creating more challenging things + like the ZP Coder or the IW44 wavelets. + + {\bf Later Changes} + When converting to I18N, we (Lizardtech) decided that two string classes + where needing, replacing the original GString with \Ref{GUTF8String} and + \Ref{GNativeString}. + + @memo + General purpose string class. + @author + L\'eon Bottou -- initial implementation.\\ + +// From: Leon Bottou, 1/31/2002 +// This file has very little to do with my initial implementation. +// It has been practically rewritten by Lizardtech for i18n changes. +// My original implementation was very small in comparison +// . +// In my opinion, the duplication of the string classes is a failed +// attempt to use the type system to enforce coding policies. +// This could be fixed. But there are better things to do in djvulibre. + + @version + #$Id: GString.h,v 1.24 2008/01/27 11:36:27 leonb Exp $# */ +//@{ + + +#include "DjVuGlobal.h" +#include "GContainer.h" + +#include +#include +#ifdef WIN32 +# define HAS_WCHAR 1 +# define HAS_MBSTATE 1 +#endif + +#if HAS_WCHAR +# if !defined(AUTOCONF) || HAVE_WCHAR_H +# include +# endif +#endif + + +#ifdef HAVE_NAMESPACES +namespace DJVU { +# ifdef NOT_DEFINED // Just to fool emacs c++ mode +} +#endif +#endif + +#if !HAS_MBSTATE +# ifndef HAVE_MBSTATE_T +typedef int mbstate_t; +# endif +#endif + +class GBaseString; +class GUTF8String; +class GNativeString; + +// Internal string representation. +class DJVUAPI GStringRep : public GPEnabled +{ +public: + enum EncodeType { XUCS4, XUCS4BE, XUCS4LE, XUCS4_2143, XUCS4_3412, + XUTF16, XUTF16BE, XUTF16LE, XUTF8, XEBCDIC, XOTHER } ; + + enum EscapeMode { UNKNOWN_ESCAPED=0, IS_ESCAPED=1, NOT_ESCAPED=2 }; + + class UTF8; + friend class UTF8; + class Unicode; + friend class Unicode; + + class ChangeLocale; +#if HAS_WCHAR + class Native; + friend class Native; +#endif // HAS_WCHAR + friend class GBaseString; + friend class GUTF8String; + friend class GNativeString; + friend DJVUAPI unsigned int hash(const GBaseString &ref); + +public: + // default constructor + GStringRep(void); + // virtual destructor + virtual ~GStringRep(); + + // Other virtual methods. + // Create an empty string. + virtual GP blank(const unsigned int sz) const = 0; + // Create a duplicate at the given size. + GP getbuf(int n) const; + // Change the value of one of the bytes. + GP setat(int n, char ch) const; + // Append a string. + virtual GP append(const GP &s2) const = 0; + // Test if isUTF8. + virtual bool isUTF8(void) const { return false; } + // Test if Native. + virtual bool isNative(void) const { return false; } + // Convert to Native. + virtual GP toNative( + const EscapeMode escape=UNKNOWN_ESCAPED ) const = 0; + // Convert to UTF8. + virtual GP toUTF8(const bool nothrow=false) const = 0; + // Convert to same as current class. + virtual GP toThis( + const GP &rep,const GP &locale=0) const = 0; + // Compare with #s2#. + virtual int cmp(const GP &s2,const int len=(-1)) const = 0; + + // Convert strings to numbers. + virtual int toInt(void) const = 0; + virtual long int toLong( + const int pos, int &endpos, const int base=10) const = 0; + virtual unsigned long toULong( + const int pos, int &endpos, const int base=10) const = 0; + virtual double toDouble(const int pos, int &endpos) const = 0; + + // return the position of the next character + int nextChar( const int from=0 ) const; + + // return next non space position + int nextNonSpace( const int from=0, const int len=(-1) ) const; + + // return next white space position + int nextSpace( const int from=0, const int len=(-1) ) const; + + // return the position after the last non-whitespace character. + int firstEndSpace( int from=0, const int len=(-1) ) const; + + // Create an empty string. + template static GP create( + const unsigned int sz,TYPE *); + // Creates with a strdup string. + GP strdup(const char *s) const; + + // Creates by appending to the current string + GP append(const char *s2) const; + + // Creates with a concat operation. + GP concat(const GP &s1,const GP &s2) const; + GP concat(const char *s1,const GP &s2) const; + GP concat(const GP &s1,const char *s2) const; + GP concat(const char *s1,const char *s2) const; + + /* Creates with a strdup and substr. Negative values have strlen(s)+1 + added to them. + */ + GP substr( + const char *s,const int start,const int length=(-1)) const; + + GP substr( + const unsigned short *s,const int start,const int length=(-1)) const; + + GP substr( + const unsigned long *s,const int start,const int length=(-1)) const; + + /** Initializes a string with a formatted string (as in #vprintf#). The + string is re-initialized with the characters generated according to the + specified format #fmt# and using the optional arguments. See the ANSI-C + function #vprintf()# for more information. The current implementation + will cause a segmentation violation if the resulting string is longer + than 32768 characters. */ + GP vformat(va_list args) const; + // -- SEARCHING + + static GP UTF8ToNative( const char *s, + const EscapeMode escape=UNKNOWN_ESCAPED ); + static GP NativeToUTF8( const char *s ); + + // Creates an uppercase version of the current string. + GP upcase(void) const; + // Creates a lowercase version of the current string. + GP downcase(void) const; + + /** Returns the next UCS4 character, and updates the pointer s. */ + static unsigned long UTF8toUCS4( + unsigned char const *&s, void const * const endptr ); + + /** Returns the number of bytes in next UCS4 character, + and sets #w# to the next UCS4 chacter. */ + static int UTF8toUCS4( + unsigned long &w, unsigned char const s[], void const * const endptr ) + { unsigned char const *r=s;w=UTF8toUCS4(r,endptr);return (int)((size_t)r-(size_t)s); } + + /** Returns the next UCS4 word from the UTF16 string. */ + static int UTF16toUCS4( + unsigned long &w, unsigned short const * const s,void const * const eptr); + + static int UCS4toUTF16( + unsigned long w, unsigned short &w1, unsigned short &w2); + + int cmp(const char *s2, const int len=(-1)) const; + static int cmp( + const GP &s1, const GP &s2, const int len=(-1)) ; + static int cmp( + const GP &s1, const char *s2, const int len=(-1)); + static int cmp( + const char *s1, const GP &s2, const int len=(-1)); + static int cmp( + const char *s1, const char *s2, const int len=(-1)); + + // Lookup the next character, and return the position of the next character. + int getUCS4(unsigned long &w, const int from) const; + + virtual unsigned char *UCS4toString( + const unsigned long w, unsigned char *ptr, mbstate_t *ps=0) const = 0; + + static unsigned char *UCS4toUTF8( + const unsigned long w,unsigned char *ptr); + + static unsigned char *UCS4toNative( + const unsigned long w,unsigned char *ptr, mbstate_t *ps); + + int search(char c, int from=0) const; + + int search(char const *str, int from=0) const; + + int rsearch(char c, int from=0) const; + + int rsearch(char const *str, int from=0) const; + + int contains(char const accept[], int from=0) const; + + int rcontains(char const accept[], int from=0) const; + +protected: + // Return the next character and increment the source pointer. + virtual unsigned long getValidUCS4(const char *&source) const = 0; + + GP tocase( + bool (*xiswcase)(const unsigned long wc), + unsigned long (*xtowcase)(const unsigned long wc)) const; + + // Tests if the specified character passes the xiswtest. If so, the + // return pointer is incremented to the next character, otherwise the + // specified #ptr# is returned. + const char * isCharType( bool (*xiswtest)(const unsigned long wc), const char *ptr, + const bool reverse=false) const; + + // Find the next character position that passes the isCharType test. + int nextCharType( + bool (*xiswtest)(const unsigned long wc),const int from,const int len, + const bool reverse=false) const; + + static bool giswspace(const unsigned long w); + static bool giswupper(const unsigned long w); + static bool giswlower(const unsigned long w); + static unsigned long gtowupper(const unsigned long w); + static unsigned long gtowlower(const unsigned long w); + + virtual void set_remainder( void const * const buf, const unsigned int size, + const EncodeType encodetype); + virtual void set_remainder( void const * const buf, const unsigned int size, + const GP &encoding ); + virtual void set_remainder ( const GP &remainder ); + + virtual GP get_remainder( void ) const; + +public: + /* Returns a copy of this string with characters used in XML with + '<' to "<", '>' to ">", '&' to "&" '\'' to + "'", and '\"' to """. Characters 0x01 through + 0x1f are also escaped. */ + GP toEscaped( const bool tosevenbit ) const; + + // Tests if a string is legally encoded in the current character set. + virtual bool is_valid(void) const = 0; + + virtual int ncopy(wchar_t * const buf, const int buflen) const = 0; + +protected: + +// Actual string data. + int size; + char *data; +}; + +class DJVUAPI GStringRep::UTF8 : public GStringRep +{ +public: + // default constructor + UTF8(void); + // virtual destructor + virtual ~UTF8(); + + // Other virtual methods. + virtual GP blank(const unsigned int sz = 0) const; + virtual GP append(const GP &s2) const; + // Test if Native. + virtual bool isUTF8(void) const; + // Convert to Native. + virtual GP toNative( + const EscapeMode escape=UNKNOWN_ESCAPED) const; + // Convert to UTF8. + virtual GP toUTF8(const bool nothrow=false) const; + // Convert to same as current class. + virtual GP toThis( + const GP &rep,const GP &) const; + // Compare with #s2#. + virtual int cmp(const GP &s2,const int len=(-1)) const; + + static GP create(const unsigned int sz = 0); + + // Convert strings to numbers. + virtual int toInt(void) const; + virtual long int toLong( + const int pos, int &endpos, const int base=10) const; + virtual unsigned long toULong( + const int pos, int &endpos, const int base=10) const; + virtual double toDouble( + const int pos, int &endpos) const; + + // Create a strdup string. + static GP create(const char *s); + + // Creates with a concat operation. + static GP create( + const GP &s1,const GP &s2); + static GP create( const GP &s1,const char *s2); + static GP create( const char *s1, const GP &s2); + static GP create( const char *s1,const char *s2); + + // Create with a strdup and substr operation. + static GP create( + const char *s,const int start,const int length=(-1)); + + static GP create( + const unsigned short *s,const int start,const int length=(-1)); + + static GP create( + const unsigned long *s,const int start,const int length=(-1)); + + static GP create_format(const char fmt[],...); + static GP create(const char fmt[],va_list& args); + + virtual unsigned char *UCS4toString( + const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const; + + // Tests if a string is legally encoded in the current character set. + virtual bool is_valid(void) const; + + virtual int ncopy(wchar_t * const buf, const int buflen) const; + + friend class GBaseString; + +protected: + // Return the next character and increment the source pointer. + virtual unsigned long getValidUCS4(const char *&source) const; +}; + + +/** General purpose character string. + Each dirivied instance of class #GBaseString# represents a + character string. Overloaded operators provide a value semantic + to #GBaseString# objects. Conversion operators and constructors + transparently convert between #GBaseString# objects and + #const char*# pointers. The #GBaseString# class has no public + constructors, since a dirived type should always be used + to specify the desired multibyte character encoding. + + Functions taking strings as arguments should declare their + arguments as "#const char*#". Such functions will work equally + well with dirived #GBaseString# objects since there is a fast + conversion operator from the dirivied #GBaseString# objects + to "#const char*#". Functions returning strings should return + #GUTF8String# or #GNativeString# objects because the class will + automatically manage the necessary memory. + + Characters in the string can be identified by their position. The + first character of a string is numbered zero. Negative positions + represent characters relative to the end of the string (i.e. + position #-1# accesses the last character of the string, + position #-2# represents the second last character, etc.) */ + +class DJVUAPI GBaseString : protected GP +{ +public: + enum EscapeMode { + UNKNOWN_ESCAPED=GStringRep::UNKNOWN_ESCAPED, + IS_ESCAPED=GStringRep::IS_ESCAPED, + NOT_ESCAPED=GStringRep::NOT_ESCAPED }; + + friend class GUTF8String; + friend class GNativeString; +protected: + // Sets the gstr pointer; + inline void init(void); + + ~GBaseString(); + inline GBaseString &init(const GP &rep); + + // -- CONSTRUCTORS + /** Null constructor. Constructs an empty string. */ + GBaseString( void ); + +public: + // -- ACCESS + /** Converts a string into a constant null terminated character + array. This conversion operator is very efficient because + it simply returns a pointer to the internal string data. The + returned pointer remains valid as long as the string is + unmodified. */ + operator const char* ( void ) const ; + /// Returns the string length. + unsigned int length( void ) const; + /** Returns true if and only if the string contains zero characters. + This operator is useful for conditional expression in control + structures. + \begin{verbatim} + if (! str) { ... } + while (!! str) { ... } -- Note the double operator! + \end{verbatim} + Class #GBaseString# does not to support syntax + "#if# #(str)# #{}#" because the required conversion operator + introduces dangerous ambiguities with certain compilers. */ + bool operator! ( void ) const; + + // -- INDEXING + /** Returns the character at position #n#. An exception + \Ref{GException} is thrown if number #n# is not in range #-len# + to #len-1#, where #len# is the length of the string. The first + character of a string is numbered zero. Negative positions + represent characters relative to the end of the string. */ + char operator[] (int n) const; + /// Returns #TRUE# if the string contains an integer number. + bool is_int(void) const; + /// Returns #TRUE# if the string contains a float number. + bool is_float(void) const; + + /** Converts strings between native & UTF8 **/ + GNativeString getUTF82Native( EscapeMode escape=UNKNOWN_ESCAPED ) const; + GUTF8String getNative2UTF8( void ) const; + + // -- ALTERING + /// Reinitializes a string with the null string. + void empty( void ); + // -- SEARCHING + /** Searches character #c# in the string, starting at position + #from# and scanning forward until reaching the end of the + string. This function returns the position of the matching + character. It returns #-1# if character #c# cannot be found. */ + int search(char c, int from=0) const; + + /** Searches sub-string #str# in the string, starting at position + #from# and scanning forward until reaching the end of the + string. This function returns the position of the first + matching character of the sub-string. It returns #-1# if + string #str# cannot be found. */ + int search(const char *str, int from=0) const; + + /** Searches character #c# in the string, starting at position + #from# and scanning backwards until reaching the beginning of + the string. This function returns the position of the matching + character. It returns #-1# if character #c# cannot be found. */ + int rsearch(char c, const int from=0) const; + /** Searches sub-string #str# in the string, starting at position + #from# and scanning backwards until reaching the beginning of + the string. This function returns the position of the first + matching character of the sub-string. It returns #-1# if + string #str# cannot be found. */ + int rsearch(const char *str, const int from=0) const; + /** Searches for any of the specified characters in the accept + string. It returns #-1# if the none of the characters and + be found, otherwise the position of the first match. */ + int contains(const char accept[], const int from=0) const; + /** Searches for any of the specified characters in the accept + string. It returns #-1# if the none of the characters and be + found, otherwise the position of the last match. */ + int rcontains(const char accept[], const int from=0) const; + + /** Concatenates strings. Returns a string composed by concatenating + the characters of strings #s1# and #s2#. */ + GUTF8String operator+(const GUTF8String &s2) const; + GNativeString operator+(const GNativeString &s2) const; + + /** Returns an integer. Implements i18n atoi. */ + int toInt(void) const; + + /** Returns a long intenger. Implments i18n strtol. */ + long toLong(const int pos, int &endpos, const int base=10) const; + + /** Returns a unsigned long integer. Implements i18n strtoul. */ + unsigned long toULong( + const int pos, int &endpos, const int base=10) const; + + /** Returns a double. Implements the i18n strtod. */ + double toDouble( + const int pos, int &endpos ) const; + + /** Returns a long intenger. Implments i18n strtol. */ + static long toLong( + const GUTF8String& src, const int pos, int &endpos, const int base=10); + + static unsigned long toULong( + const GUTF8String& src, const int pos, int &endpos, const int base=10); + + static double toDouble( + const GUTF8String& src, const int pos, int &endpos); + + /** Returns a long intenger. Implments i18n strtol. */ + static long toLong( + const GNativeString& src, const int pos, int &endpos, const int base=10); + + static unsigned long toULong( + const GNativeString& src, const int pos, int &endpos, const int base=10); + + static double toDouble( + const GNativeString& src, const int pos, int &endpos); + + // -- HASHING + + // -- COMPARISONS + /** Returns an #int#. Compares string with #s2# and returns + sorting order. */ + int cmp(const GBaseString &s2, const int len=(-1)) const; + /** Returns an #int#. Compares string with #s2# and returns + sorting order. */ + int cmp(const char *s2, const int len=(-1)) const; + /** Returns an #int#. Compares string with #s2# and returns + sorting order. */ + int cmp(const char s2) const; + /** Returns an #int#. Compares #s2# with #s2# and returns + sorting order. */ + static int cmp(const char *s1, const char *s2, const int len=(-1)); + /** Returns a boolean. The Standard C strncmp takes two string and + compares the first N characters. static bool GBaseString::ncmp + will compare #s1# with #s2# with the #len# characters starting + from the beginning of the string. */ + /** String comparison. Returns true if and only if character + strings #s1# and #s2# are equal (as with #strcmp#.) + */ + bool operator==(const GBaseString &s2) const; + bool operator==(const char *s2) const; + friend bool operator==(const char *s1, const GBaseString &s2); + + /** String comparison. Returns true if and only if character + strings #s1# and #s2# are not equal (as with #strcmp#.) + */ + bool operator!=(const GBaseString &s2) const; + bool operator!=(const char *s2) const; + friend bool operator!=(const char *s1, const GBaseString &s2); + + /** String comparison. Returns true if and only if character + strings #s1# is lexicographically greater than or equal to + string #s2# (as with #strcmp#.) */ + bool operator>=(const GBaseString &s2) const; + bool operator>=(const char *s2) const; + bool operator>=(const char s2) const; + friend bool operator>=(const char *s1, const GBaseString &s2); + friend bool operator>=(const char s1, const GBaseString &s2); + + /** String comparison. Returns true if and only if character + strings #s1# is lexicographically less than string #s2# + (as with #strcmp#.) + */ + bool operator<(const GBaseString &s2) const; + bool operator<(const char *s2) const; + bool operator<(const char s2) const; + friend bool operator<(const char *s1, const GBaseString &s2); + friend bool operator<(const char s1, const GBaseString &s2); + + /** String comparison. Returns true if and only if character + strings #s1# is lexicographically greater than string #s2# + (as with #strcmp#.) + */ + bool operator> (const GBaseString &s2) const; + bool operator> (const char *s2) const; + bool operator> (const char s2) const; + friend bool operator> (const char *s1, const GBaseString &s2); + friend bool operator> (const char s1, const GBaseString &s2); + + /** String comparison. Returns true if and only if character + strings #s1# is lexicographically less than or equal to string + #s2# (as with #strcmp#.) + */ + bool operator<=(const GBaseString &s2) const; + bool operator<=(const char *s2) const; + bool operator<=(const char s2) const; + friend bool operator<=(const char *s1, const GBaseString &s2); + friend bool operator<=(const char s1, const GBaseString &s2); + + /** Returns an integer. Implements a functional i18n atoi. Note + that if you pass a GBaseString that is not in Native format + the results may be disparaging. */ + + /** Returns a hash code for the string. This hashing function + helps when creating associative maps with string keys (see + \Ref{GMap}). This hash code may be reduced to an arbitrary + range by computing its remainder modulo the upper bound of + the range. */ + friend DJVUAPI unsigned int hash(const GBaseString &ref); + // -- HELPERS + friend class GStringRep; + + /// Returns next non space position. + int nextNonSpace( const int from=0, const int len=(-1) ) const; + + /// Returns next character position. + int nextChar( const int from=0 ) const; + + /// Returns next non space position. + int nextSpace( const int from=0, const int len=(-1) ) const; + + /// return the position after the last non-whitespace character. + int firstEndSpace( const int from=0,const int len=(-1) ) const; + + /// Tests if the string is legally encoded in the current codepage. + bool is_valid(void) const; + + /// copy to a wchar_t buffer + int ncopy(wchar_t * const buf, const int buflen) const; + +protected: + const char *gstr; + static void throw_illegal_subscript() no_return; + static const char *nullstr; +public: + GNativeString UTF8ToNative( + const bool currentlocale=false, + const EscapeMode escape=UNKNOWN_ESCAPED) const; + GUTF8String NativeToUTF8(void) const; +protected: + inline int CheckSubscript(int n) const; +}; + +/** General purpose character string. + Each instance of class #GUTF8String# represents a character + string. Overloaded operators provide a value semantic to + #GUTF8String# objects. Conversion operators and constructors + transparently convert between #GUTF8String# objects and + #const char*# pointers. + + Functions taking strings as arguments should declare their + arguments as "#const char*#". Such functions will work equally + well with #GUTF8String# objects since there is a fast conversion + operator from #GUTF8String# to "#const char*#". Functions + returning strings should return #GUTF8String# or #GNativeString# + objects because the class will automatically manage the necessary + memory. + + Characters in the string can be identified by their position. The + first character of a string is numbered zero. Negative positions + represent characters relative to the end of the string (i.e. + position #-1# accesses the last character of the string, + position #-2# represents the second last character, etc.) */ + +class DJVUAPI GUTF8String : public GBaseString +{ +public: + ~GUTF8String(); + inline void init(void); + + inline GUTF8String &init(const GP &rep); + + // -- CONSTRUCTORS + /** Null constructor. Constructs an empty string. */ + GUTF8String(void); + /// Constructs a string from a character. + GUTF8String(const char dat); + /// Constructs a string from a null terminated character array. + GUTF8String(const char *str); + /// Constructs a string from a null terminated character array. + GUTF8String(const unsigned char *str); + GUTF8String(const unsigned short *dat); + GUTF8String(const unsigned long *dat); + /** Constructs a string from a character array. Elements of the + character array #dat# are added into the string until the + string length reaches #len# or until encountering a null + character (whichever comes first). */ + GUTF8String(const char *dat, unsigned int len); + GUTF8String(const unsigned short *dat, unsigned int len); + GUTF8String(const unsigned long *dat, unsigned int len); + + /// Construct from base class. + GUTF8String(const GP &str); + GUTF8String(const GBaseString &str); + GUTF8String(const GUTF8String &str); + GUTF8String(const GNativeString &str); + /** Constructs a string from a character array. Elements of the + character array #dat# are added into the string until the + string length reaches #len# or until encountering a null + character (whichever comes first). */ + GUTF8String(const GBaseString &gs, int from, int len); + + /** Copy a null terminated character array. Resets this string + with the character string contained in the null terminated + character array #str#. */ + GUTF8String& operator= (const char str); + GUTF8String& operator= (const char *str); + inline GUTF8String& operator= (const GP &str); + inline GUTF8String& operator= (const GBaseString &str); + inline GUTF8String& operator= (const GUTF8String &str); + inline GUTF8String& operator= (const GNativeString &str); + + /** Constructs a string with a formatted string (as in #vprintf#). + The string is re-initialized with the characters generated + according to the specified format #fmt# and using the optional + arguments. See the ANSI-C function #vprintf()# for more + information. The current implementation will cause a + segmentation violation if the resulting string is longer + than 32768 characters. */ + GUTF8String(const GUTF8String &fmt, va_list &args); + + /// Constructs a string from a character. + /** Constructs a string with a human-readable representation of + integer #number#. The format is similar to format #"%d"# in + function #printf#. */ + GUTF8String(const int number); + + /** Constructs a string with a human-readable representation of + floating point number #number#. The format is similar to + format #"%f"# in function #printf#. */ + GUTF8String(const double number); + + + /** Initializes a string with a formatted string (as in #printf#). + The string is re-initialized with the characters generated + according to the specified format #fmt# and using the optional + arguments. See the ANSI-C function #printf()# for more + information. The current implementation will cause a + segmentation violation if the resulting string is longer + than 32768 characters. */ + GUTF8String &format(const char *fmt, ... ); + /** Initializes a string with a formatted string (as in #vprintf#). + The string is re-initialized with the characters generated + according to the specified format #fmt# and using the optional + arguments. See the ANSI-C function #vprintf()# for more + information. The current implementation will cause a + segmentation violation if the resulting string is longer + than 32768 characters. */ + GUTF8String &vformat(const GUTF8String &fmt, va_list &args); + + /** Returns a copy of this string with characters used in XML with + '<' to "<", '>' to ">", '&' to "&" '\'' to + "'", and '\"' to """. Characters 0x01 through + 0x1f are also escaped. */ + GUTF8String toEscaped( const bool tosevenbit=false ) const; + + /** Converts strings containing HTML/XML escaped characters into + their unescaped forms. Numeric representations of characters + (e.g., "&" or "&" for "*") are the only forms + converted by this function. */ + GUTF8String fromEscaped( void ) const; + + /** Converts strings containing HTML/XML escaped characters + (e.g., "<" for "<") into their unescaped forms. The + conversion is partially defined by the ConvMap argument which + specifies the conversion strings to be recognized. Numeric + representations of characters (e.g., "&" or "&" + for "*") are always converted. */ + GUTF8String fromEscaped( + const GMap ConvMap ) const; + + + // -- CONCATENATION + /// Appends character #ch# to the string. + GUTF8String& operator+= (char ch); + + /// Appends the null terminated character array #str# to the string. + GUTF8String& operator+= (const char *str); + /// Appends the specified GBaseString to the string. + GUTF8String& operator+= (const GBaseString &str); + + /** Returns a sub-string. The sub-string is composed by copying + #len# characters starting at position #from# in this string. + The length of the resulting string may be smaller than #len# + if the specified range is too large. */ + GUTF8String substr(int from, int len/*=(-1)*/) const; + + /** Returns an upper case copy of this string. The returned string + contains a copy of the current string with all letters turned + into upper case letters. */ + GUTF8String upcase( void ) const; + /** Returns an lower case copy of this string. The returned string + contains a copy of the current string with all letters turned + into lower case letters. */ + GUTF8String downcase( void ) const; + + /** Concatenates strings. Returns a string composed by concatenating + the characters of strings #s1# and #s2#. + */ + GUTF8String operator+(const GBaseString &s2) const; + GUTF8String operator+(const GUTF8String &s2) const; + GUTF8String operator+(const GNativeString &s2) const; + GUTF8String operator+(const char *s2) const; + friend DJVUAPI GUTF8String operator+(const char *s1, const GUTF8String &s2); + + /** Provides a direct access to the string buffer. Returns a + pointer for directly accessing the string buffer. This pointer + valid remains valid as long as the string is not modified by + other means. Positive values for argument #n# represent the + length of the returned buffer. The returned string buffer will + be large enough to hold at least #n# characters plus a null + character. If #n# is positive but smaller than the string + length, the string will be truncated to #n# characters. */ + char *getbuf(int n = -1); + /** Set the character at position #n# to value #ch#. An exception + \Ref{GException} is thrown if number #n# is not in range #-len# + to #len#, where #len# is the length of the string. If character + #ch# is zero, the string is truncated at position #n#. The + first character of a string is numbered zero. Negative + positions represent characters relative to the end of the + string. If position #n# is equal to the length of the string, + this function appends character #ch# to the end of the string. */ + void setat(const int n, const char ch); +public: + typedef enum GStringRep::EncodeType EncodeType; + static GUTF8String create(void const * const buf, + const unsigned int size, + const EncodeType encodetype, const GUTF8String &encoding); + static GUTF8String create( void const * const buf, + unsigned int size, const EncodeType encodetype ); + static GUTF8String create( void const * const buf, + const unsigned int size, const GUTF8String &encoding ); + static GUTF8String create( void const * const buf, + const unsigned int size, const GP &remainder); + GP get_remainder(void) const; + static GUTF8String create( const char *buf, const unsigned int bufsize ); + static GUTF8String create( const unsigned short *buf, const unsigned int bufsize ); + static GUTF8String create( const unsigned long *buf, const unsigned int bufsize ); +}; + + +#if !HAS_WCHAR +#define GBaseString GUTF8String +#endif + +/** General purpose character string. + Each instance of class #GNativeString# represents a character + string. Overloaded operators provide a value semantic to + #GNativeString# objects. Conversion operators and constructors + transparently convert between #GNativeString# objects and + #const char*# pointers. + + Functions taking strings as arguments should declare their + arguments as "#const char*#". Such functions will work equally + well with #GNativeString# objects since there is a fast conversion + operator from #GNativeString# to "#const char*#". Functions + returning strings should return #GUTF8String# or #GNativeString# + objects because the class will automatically manage the necessary + memory. + + Characters in the string can be identified by their position. The + first character of a string is numbered zero. Negative positions + represent characters relative to the end of the string (i.e. + position #-1# accesses the last character of the string, + position #-2# represents the second last character, etc.) */ + +class DJVUAPI GNativeString : public GBaseString +{ +public: + ~GNativeString(); + // -- CONSTRUCTORS + /** Null constructor. Constructs an empty string. */ + GNativeString(void); + /// Constructs a string from a character. + GNativeString(const char dat); + /// Constructs a string from a null terminated character array. + GNativeString(const char *str); + /// Constructs a string from a null terminated character array. + GNativeString(const unsigned char *str); + GNativeString(const unsigned short *str); + GNativeString(const unsigned long *str); + /** Constructs a string from a character array. Elements of the + character array #dat# are added into the string until the + string length reaches #len# or until encountering a null + character (whichever comes first). */ + GNativeString(const char *dat, unsigned int len); + GNativeString(const unsigned short *dat, unsigned int len); + GNativeString(const unsigned long *dat, unsigned int len); + /// Construct from base class. + GNativeString(const GP &str); + GNativeString(const GBaseString &str); +#if HAS_WCHAR + GNativeString(const GUTF8String &str); +#endif + GNativeString(const GNativeString &str); + /** Constructs a string from a character array. Elements of the + character array #dat# are added into the string until the + string length reaches #len# or until encountering a null + character (whichever comes first). */ + GNativeString(const GBaseString &gs, int from, int len); + + /** Constructs a string with a formatted string (as in #vprintf#). + The string is re-initialized with the characters generated + according to the specified format #fmt# and using the optional + arguments. See the ANSI-C function #vprintf()# for more + information. The current implementation will cause a + segmentation violation if the resulting string is longer than + 32768 characters. */ + GNativeString(const GNativeString &fmt, va_list &args); + + /** Constructs a string with a human-readable representation of + integer #number#. The format is similar to format #"%d"# in + function #printf#. */ + GNativeString(const int number); + + /** Constructs a string with a human-readable representation of + floating point number #number#. The format is similar to + format #"%f"# in function #printf#. */ + GNativeString(const double number); + +#if !HAS_WCHAR +#undef GBaseString +#else + /// Initialize this string class + void init(void); + + /// Initialize this string class + GNativeString &init(const GP &rep); + + /** Copy a null terminated character array. Resets this string with + the character string contained in the null terminated character + array #str#. */ + GNativeString& operator= (const char str); + GNativeString& operator= (const char *str); + inline GNativeString& operator= (const GP &str); + inline GNativeString& operator= (const GBaseString &str); + inline GNativeString& operator= (const GUTF8String &str); + inline GNativeString& operator= (const GNativeString &str); + // -- CONCATENATION + /// Appends character #ch# to the string. + GNativeString& operator+= (char ch); + /// Appends the null terminated character array #str# to the string. + GNativeString& operator+= (const char *str); + /// Appends the specified GBaseString to the string. + GNativeString& operator+= (const GBaseString &str); + + /** Returns a sub-string. The sub-string is composed by copying + #len# characters starting at position #from# in this string. + The length of the resulting string may be smaller than #len# + if the specified range is too large. */ + GNativeString substr(int from, int len/*=(-1)*/) const; + + /** Returns an upper case copy of this string. The returned + string contains a copy of the current string with all letters + turned into upper case letters. */ + GNativeString upcase( void ) const; + /** Returns an lower case copy of this string. The returned + string contains a copy of the current string with all letters + turned into lower case letters. */ + GNativeString downcase( void ) const; + + + GNativeString operator+(const GBaseString &s2) const; + GNativeString operator+(const GNativeString &s2) const; + GUTF8String operator+(const GUTF8String &s2) const; + GNativeString operator+(const char *s2) const; + friend DJVUAPI GNativeString operator+(const char *s1, const GNativeString &s2); + + /** Initializes a string with a formatted string (as in #printf#). + The string is re-initialized with the characters generated + according to the specified format #fmt# and using the optional + arguments. See the ANSI-C function #printf()# for more + information. The current implementation will cause a + segmentation violation if the resulting string is longer than + 32768 characters. */ + GNativeString &format(const char *fmt, ... ); + /** Initializes a string with a formatted string (as in #vprintf#). + The string is re-initialized with the characters generated + according to the specified format #fmt# and using the optional + arguments. See the ANSI-C function #vprintf()# for more + information. The current implementation will cause a + segmentation violation if the resulting string is longer than + 32768 characters. */ + GNativeString &vformat(const GNativeString &fmt, va_list &args); + + /** Returns a copy of this string with characters used in XML with + '<' to "<", '>' to ">", '&' to "&" '\'' to + "'", and '\"' to """. Characters 0x01 through + 0x1f are also escaped. */ + GNativeString toEscaped( const bool tosevenbit=false ) const; + + + /** Provides a direct access to the string buffer. Returns a + pointer for directly accessing the string buffer. This + pointer valid remains valid as long as the string is not + modified by other means. Positive values for argument #n# + represent the length of the returned buffer. The returned + string buffer will be large enough to hold at least #n# + characters plus a null character. If #n# is positive but + smaller than the string length, the string will be truncated + to #n# characters. */ + char *getbuf(int n = -1); + /** Set the character at position #n# to value #ch#. An exception + \Ref{GException} is thrown if number #n# is not in range #-len# + to #len#, where #len# is the length of the string. If + character #ch# is zero, the string is truncated at position + #n#. The first character of a string is numbered zero. + Negative positions represent characters relative to the end of + the string. If position #n# is equal to the length of the + string, this function appends character #ch# to the end of the + string. */ + void setat(const int n, const char ch); + + static GNativeString create( const char *buf, const unsigned int bufsize ); + static GNativeString create( const unsigned short *buf, const unsigned int bufsize ); + static GNativeString create( const unsigned long *buf, const unsigned int bufsize ); +#endif // WinCE +}; + +//@} + +inline +GBaseString::operator const char* ( void ) const +{ + return ptr?(*this)->data:nullstr; +} + +inline unsigned int +GBaseString::length( void ) const +{ + return ptr ? (*this)->size : 0; +} + +inline bool +GBaseString::operator! ( void ) const +{ + return !ptr; +} + +inline GUTF8String +GUTF8String::upcase( void ) const +{ + if (ptr) return (*this)->upcase(); + return *this; +} + +inline GUTF8String +GUTF8String::downcase( void ) const +{ + if (ptr) return (*this)->downcase(); + return *this; +} + +inline void +GUTF8String::init(void) +{ GBaseString::init(); } + +inline GUTF8String & +GUTF8String::init(const GP &rep) +{ GP::operator=(rep?rep->toUTF8(true):rep); init(); return *this; } + +inline GUTF8String & +GUTF8String::vformat(const GUTF8String &fmt, va_list &args) +{ return (*this = (fmt.ptr?GUTF8String(fmt,args):fmt)); } + +inline GUTF8String +GUTF8String::toEscaped( const bool tosevenbit ) const +{ return ptr?GUTF8String((*this)->toEscaped(tosevenbit)):(*this); } + +inline GP +GUTF8String::get_remainder(void) const +{ + GP retval; + if(ptr) + retval=((*this)->get_remainder()); + return retval; +} + +inline +GUTF8String::GUTF8String(const GNativeString &str) +{ init(str.length()?(str->toUTF8(true)):(GP)str); } + +inline +GUTF8String::GUTF8String(const GP &str) +{ init(str?(str->toUTF8(true)):str); } + +inline +GUTF8String::GUTF8String(const GBaseString &str) +{ init(str.length()?(str->toUTF8(true)):(GP)str); } + +inline void +GBaseString::init(void) +{ + gstr=ptr?((*this)->data):nullstr; +} +/** Returns an integer. Implements i18n atoi. */ +inline int +GBaseString::toInt(void) const +{ return ptr?(*this)->toInt():0; } + +/** Returns a long intenger. Implments i18n strtol. */ +inline long +GBaseString::toLong(const int pos, int &endpos, const int base) const +{ + long int retval=0; + if(ptr) + { + retval=(*this)->toLong(pos, endpos, base); + }else + { + endpos=(-1); + } + return retval; +} + +inline long +GBaseString::toLong( + const GUTF8String& src, const int pos, int &endpos, const int base) +{ + return src.toLong(pos,endpos,base); +} + +inline long +GBaseString::toLong( + const GNativeString& src, const int pos, int &endpos, const int base) +{ + return src.toLong(pos,endpos,base); +} + +/** Returns a unsigned long integer. Implements i18n strtoul. */ +inline unsigned long +GBaseString::toULong(const int pos, int &endpos, const int base) const +{ + unsigned long retval=0; + if(ptr) + { + retval=(*this)->toULong(pos, endpos, base); + }else + { + endpos=(-1); + } + return retval; +} + +inline unsigned long +GBaseString::toULong( + const GUTF8String& src, const int pos, int &endpos, const int base) +{ + return src.toULong(pos,endpos,base); +} + +inline unsigned long +GBaseString::toULong( + const GNativeString& src, const int pos, int &endpos, const int base) +{ + return src.toULong(pos,endpos,base); +} + +/** Returns a double. Implements the i18n strtod. */ +inline double +GBaseString::toDouble( + const int pos, int &endpos ) const +{ + double retval=(double)0; + if(ptr) + { + retval=(*this)->toDouble(pos, endpos); + }else + { + endpos=(-1); + } + return retval; +} + +inline double +GBaseString::toDouble( + const GUTF8String& src, const int pos, int &endpos) +{ + return src.toDouble(pos,endpos); +} + +inline double +GBaseString::toDouble( + const GNativeString& src, const int pos, int &endpos) +{ + return src.toDouble(pos,endpos); +} + +inline GBaseString & +GBaseString::init(const GP &rep) +{ GP::operator=(rep); init(); return *this;} + +inline char +GBaseString::operator[] (int n) const +{ return ((n||ptr)?((*this)->data[CheckSubscript(n)]):0); } + +inline int +GBaseString::search(char c, int from) const +{ return ptr?((*this)->search(c,from)):(-1); } + +inline int +GBaseString::search(const char *str, int from) const +{ return ptr?((*this)->search(str,from)):(-1); } + +inline int +GBaseString::rsearch(char c, const int from) const +{ return ptr?((*this)->rsearch(c,from)):(-1); } + +inline int +GBaseString::rsearch(const char *str, const int from) const +{ return ptr?((*this)->rsearch(str,from)):(-1); } + +inline int +GBaseString::contains(const char accept[], const int from) const +{ return ptr?((*this)->contains(accept,from)):(-1); } + +inline int +GBaseString::rcontains(const char accept[], const int from) const +{ return ptr?((*this)->rcontains(accept,from)):(-1); } + +inline int +GBaseString::cmp(const GBaseString &s2, const int len) const +{ return GStringRep::cmp(*this,s2,len); } + +inline int +GBaseString::cmp(const char *s2, const int len) const +{ return GStringRep::cmp(*this,s2,len); } + +inline int +GBaseString::cmp(const char s2) const +{ return GStringRep::cmp(*this,&s2,1); } + +inline int +GBaseString::cmp(const char *s1, const char *s2, const int len) +{ return GStringRep::cmp(s1,s2,len); } + +inline bool +GBaseString::operator==(const GBaseString &s2) const +{ return !cmp(s2); } + +inline bool +GBaseString::operator==(const char *s2) const +{ return !cmp(s2); } + +inline bool +GBaseString::operator!=(const GBaseString &s2) const +{ return !!cmp(s2); } + +inline bool +GBaseString::operator!=(const char *s2) const +{ return !!cmp(s2); } + +inline bool +GBaseString::operator>=(const GBaseString &s2) const +{ return (cmp(s2)>=0); } + +inline bool +GBaseString::operator>=(const char *s2) const +{ return (cmp(s2)>=0); } + +inline bool +GBaseString::operator>=(const char s2) const +{ return (cmp(s2)>=0); } + +inline bool +GBaseString::operator<(const GBaseString &s2) const +{ return (cmp(s2)<0); } + +inline bool +GBaseString::operator<(const char *s2) const +{ return (cmp(s2)<0); } + +inline bool +GBaseString::operator<(const char s2) const +{ return (cmp(s2)<0); } + +inline bool +GBaseString::operator> (const GBaseString &s2) const +{ return (cmp(s2)>0); } + +inline bool +GBaseString::operator> (const char *s2) const +{ return (cmp(s2)>0); } + +inline bool +GBaseString::operator> (const char s2) const +{ return (cmp(s2)>0); } + +inline bool +GBaseString::operator<=(const GBaseString &s2) const +{ return (cmp(s2)<=0); } + +inline bool +GBaseString::operator<=(const char *s2) const +{ return (cmp(s2)<=0); } + +inline bool +GBaseString::operator<=(const char s2) const +{ return (cmp(s2)<=0); } + +inline int +GBaseString::nextNonSpace( const int from, const int len ) const +{ return ptr?(*this)->nextNonSpace(from,len):0; } + +inline int +GBaseString::nextChar( const int from ) const +{ return ptr?(*this)->nextChar(from):0; } + +inline int +GBaseString::nextSpace( const int from, const int len ) const +{ return ptr?(*this)->nextSpace(from,len):0; } + +inline int +GBaseString::firstEndSpace( const int from,const int len ) const +{ return ptr?(*this)->firstEndSpace(from,len):0; } + +inline bool +GBaseString::is_valid(void) const +{ return ptr?((*this)->is_valid()):true; } + +inline int +GBaseString::ncopy(wchar_t * const buf, const int buflen) const +{if(buf&&buflen)buf[0]=0;return ptr?((*this)->ncopy(buf,buflen)):0;} + +inline int +GBaseString::CheckSubscript(int n) const +{ + if(n) + { + if (n<0 && ptr) + n += (*this)->size; + if (n<0 || !ptr || n > (int)(*this)->size) + throw_illegal_subscript(); + } + return n; +} + +inline GBaseString::GBaseString(void) { init(); } + +inline GUTF8String::GUTF8String(void) { } + +inline GUTF8String::GUTF8String(const GUTF8String &str) +{ init(str); } + +inline GUTF8String& GUTF8String::operator= (const GP &str) +{ return init(str); } + +inline GUTF8String& GUTF8String::operator= (const GBaseString &str) +{ return init(str); } + +inline GUTF8String& GUTF8String::operator= (const GUTF8String &str) +{ return init(str); } + +inline GUTF8String& GUTF8String::operator= (const GNativeString &str) +{ return init(str); } + +inline GUTF8String +GUTF8String::create( const char *buf, const unsigned int bufsize ) +{ +#if HAS_WCHAR + return GNativeString(buf,bufsize); +#else + return GUTF8String(buf,bufsize); +#endif +} + +inline GUTF8String +GUTF8String::create( const unsigned short *buf, const unsigned int bufsize ) +{ + return GUTF8String(buf,bufsize); +} + +inline GUTF8String +GUTF8String::create( const unsigned long *buf, const unsigned int bufsize ) +{ + return GUTF8String(buf,bufsize); +} + +inline GNativeString::GNativeString(void) {} + +#if !HAS_WCHAR +// For Windows CE, GNativeString is essentially GUTF8String + +inline +GNativeString::GNativeString(const GUTF8String &str) +: GUTF8String(str) {} + +inline +GNativeString::GNativeString(const GP &str) +: GUTF8String(str) {} + +inline +GNativeString::GNativeString(const char dat) +: GUTF8String(dat) {} + +inline +GNativeString::GNativeString(const char *str) +: GUTF8String(str) {} + +inline +GNativeString::GNativeString(const unsigned char *str) +: GUTF8String(str) {} + +inline +GNativeString::GNativeString(const unsigned short *str) +: GUTF8String(str) {} + +inline +GNativeString::GNativeString(const unsigned long *str) +: GUTF8String(str) {} + +inline +GNativeString::GNativeString(const char *dat, unsigned int len) +: GUTF8String(dat,len) {} + +inline +GNativeString::GNativeString(const unsigned short *dat, unsigned int len) +: GUTF8String(dat,len) {} + +inline +GNativeString::GNativeString(const unsigned long *dat, unsigned int len) +: GUTF8String(dat,len) {} + +inline +GNativeString::GNativeString(const GNativeString &str) +: GUTF8String(str) {} + +inline +GNativeString::GNativeString(const int number) +: GUTF8String(number) {} + +inline +GNativeString::GNativeString(const double number) +: GUTF8String(number) {} + +inline +GNativeString::GNativeString(const GNativeString &fmt, va_list &args) +: GUTF8String(fmt,args) {} + +#else // HAS_WCHAR + +/// Initialize this string class +inline void +GNativeString::init(void) +{ GBaseString::init(); } + +/// Initialize this string class +inline GNativeString & +GNativeString::init(const GP &rep) +{ + GP::operator=(rep?rep->toNative(GStringRep::NOT_ESCAPED):rep); + init(); + return *this; +} + +inline GNativeString +GNativeString::substr(int from, int len) const +{ return GNativeString(*this, from, len); } + +inline GNativeString & +GNativeString::vformat(const GNativeString &fmt, va_list &args) +{ return (*this = (fmt.ptr?GNativeString(fmt,args):fmt)); } + +inline GNativeString +GNativeString::toEscaped( const bool tosevenbit ) const +{ return ptr?GNativeString((*this)->toEscaped(tosevenbit)):(*this); } + +inline +GNativeString::GNativeString(const GUTF8String &str) +{ + if (str.length()) + init(str->toNative(GStringRep::NOT_ESCAPED)); + else + init((GP)str); +} + +inline +GNativeString::GNativeString(const GP &str) +{ + if (str) + init(str->toNative(GStringRep::NOT_ESCAPED)); + else + init(str); +} + +inline +GNativeString::GNativeString(const GBaseString &str) +{ + if (str.length()) + init(str->toNative(GStringRep::NOT_ESCAPED)); + else + init((GP)str); +} + + +inline +GNativeString::GNativeString(const GNativeString &fmt, va_list &args) +{ + if (fmt.ptr) + init(fmt->vformat(args)); + else + init(fmt); +} + +inline GNativeString +GNativeString::create( const char *buf, const unsigned int bufsize ) +{ + return GNativeString(buf,bufsize); +} + +inline GNativeString +GNativeString::create( const unsigned short *buf, const unsigned int bufsize ) +{ + return GNativeString(buf,bufsize); +} + +inline GNativeString +GNativeString::create( const unsigned long *buf, const unsigned int bufsize ) +{ + return GNativeString(buf,bufsize); +} + +inline GNativeString& +GNativeString::operator= (const GP &str) +{ return init(str); } + +inline GNativeString& +GNativeString::operator= (const GBaseString &str) +{ return init(str); } + +inline GNativeString& +GNativeString::operator= (const GUTF8String &str) +{ return init(str); } + +inline GNativeString& +GNativeString::operator= (const GNativeString &str) +{ return init(str); } + +inline GNativeString +GNativeString::upcase( void ) const +{ + if (ptr) return (*this)->upcase(); + return *this; +} + +inline GNativeString +GNativeString::downcase( void ) const +{ + if (ptr) return (*this)->downcase(); + return *this; +} + +#endif // HAS_WCHAR + +inline bool +operator==(const char *s1, const GBaseString &s2) +{ return !s2.cmp(s1); } + +inline bool +operator!=(const char *s1, const GBaseString &s2) +{ return !!s2.cmp(s1); } + +inline bool +operator>=(const char *s1, const GBaseString &s2) +{ return (s2.cmp(s1)<=0); } + +inline bool +operator>=(const char s1, const GBaseString &s2) +{ return (s2.cmp(s1)<=0); } + +inline bool +operator<(const char *s1, const GBaseString &s2) +{ return (s2.cmp(s1)>0); } + +inline bool +operator<(const char s1, const GBaseString &s2) +{ return (s2.cmp(s1)>0); } + +inline bool +operator> (const char *s1, const GBaseString &s2) +{ return (s2.cmp(s1)<0); } + +inline bool +operator> (const char s1, const GBaseString &s2) +{ return (s2.cmp(s1)<0); } + +inline bool +operator<=(const char *s1, const GBaseString &s2) +{ return !(s1>s2); } + +inline bool +operator<=(const char s1, const GBaseString &s2) +{ return !(s1>s2); } + +// ------------------- The end + + +#ifdef HAVE_NAMESPACES +} +# ifndef NOT_USING_DJVU_NAMESPACE +using namespace DJVU; +# endif +#endif +#endif + diff --git a/DjVuFile/libdjvu/DjVuInfo.cpp b/DjVuFile/libdjvu/DjVuInfo.cpp index 607bbceb4e..74a9c19ab2 100644 --- a/DjVuFile/libdjvu/DjVuInfo.cpp +++ b/DjVuFile/libdjvu/DjVuInfo.cpp @@ -66,7 +66,7 @@ #include "DjVuInfo.h" #include "GException.h" #include "ByteStream.h" -#include "GString.h" +#include "DjVuGString.h" #ifdef HAVE_NAMESPACES diff --git a/DjVuFile/libdjvu/DjVuMessageLite.h b/DjVuFile/libdjvu/DjVuMessageLite.h index 598bc0de76..5cd63235f6 100644 --- a/DjVuFile/libdjvu/DjVuMessageLite.h +++ b/DjVuFile/libdjvu/DjVuMessageLite.h @@ -71,7 +71,7 @@ // and added support for non I18N messages. -#include "GString.h" +#include "DjVuGString.h" #ifdef HAVE_NAMESPACES namespace DJVU { diff --git a/DjVuFile/libdjvu/DjVuNavDir.h b/DjVuFile/libdjvu/DjVuNavDir.h index 6ecca4c123..f365ae6f83 100644 --- a/DjVuFile/libdjvu/DjVuNavDir.h +++ b/DjVuFile/libdjvu/DjVuNavDir.h @@ -66,7 +66,7 @@ #endif -#include "GString.h" +#include "DjVuGString.h" #include "GThreads.h" #include "GURL.h" diff --git a/DjVuFile/libdjvu/GBitmap.cpp b/DjVuFile/libdjvu/GBitmap.cpp index d25495b787..e0648b80cd 100644 --- a/DjVuFile/libdjvu/GBitmap.cpp +++ b/DjVuFile/libdjvu/GBitmap.cpp @@ -66,7 +66,7 @@ #include "GBitmap.h" #include "ByteStream.h" #include "GRect.h" -#include "GString.h" +#include "DjVuGString.h" #include "GThreads.h" #include "GException.h" #include diff --git a/DjVuFile/libdjvu/GIFFManager.h b/DjVuFile/libdjvu/GIFFManager.h index f8b55d0ecd..387af40363 100644 --- a/DjVuFile/libdjvu/GIFFManager.h +++ b/DjVuFile/libdjvu/GIFFManager.h @@ -70,7 +70,7 @@ #include "GContainer.h" #include "Arrays.h" #include "GSmartPointer.h" -#include "GString.h" +#include "DjVuGString.h" #ifdef HAVE_NAMESPACES namespace DJVU { diff --git a/DjVuFile/libdjvu/GMapAreas.h b/DjVuFile/libdjvu/GMapAreas.h index e27dddde19..8b2f17d02d 100644 --- a/DjVuFile/libdjvu/GMapAreas.h +++ b/DjVuFile/libdjvu/GMapAreas.h @@ -68,7 +68,7 @@ #include "GSmartPointer.h" #include "GContainer.h" -#include "GString.h" +#include "DjVuGString.h" #include "GRect.h" #include "GURL.h" diff --git a/DjVuFile/libdjvu/GOS.h b/DjVuFile/libdjvu/GOS.h index 085e22b6b0..44c2ec5c6b 100644 --- a/DjVuFile/libdjvu/GOS.h +++ b/DjVuFile/libdjvu/GOS.h @@ -87,7 +87,7 @@ //@{ #include "DjVuGlobal.h" -#include "GString.h" +#include "DjVuGString.h" #ifdef HAVE_NAMESPACES namespace DJVU { diff --git a/DjVuFile/libdjvu/GPixmap.cpp b/DjVuFile/libdjvu/GPixmap.cpp index 8d77ac95d4..26ff67a3ab 100644 --- a/DjVuFile/libdjvu/GPixmap.cpp +++ b/DjVuFile/libdjvu/GPixmap.cpp @@ -70,7 +70,7 @@ #include "GPixmap.h" -#include "GString.h" +#include "DjVuGString.h" #include "GException.h" #include "ByteStream.h" #include "GRect.h" diff --git a/DjVuFile/libdjvu/GURL.h b/DjVuFile/libdjvu/GURL.h index 92f9e367c4..dfd6d65534 100644 --- a/DjVuFile/libdjvu/GURL.h +++ b/DjVuFile/libdjvu/GURL.h @@ -66,7 +66,7 @@ #endif -#include "GString.h" +#include "DjVuGString.h" #include "Arrays.h" #include "GThreads.h" diff --git a/DjVuFile/libdjvu/GUnicode.cpp b/DjVuFile/libdjvu/GUnicode.cpp index 6bbf6b7734..21bf02d38d 100644 --- a/DjVuFile/libdjvu/GUnicode.cpp +++ b/DjVuFile/libdjvu/GUnicode.cpp @@ -63,7 +63,7 @@ # pragma implementation #endif -#include "GString.h" +#include "DjVuGString.h" #if HAS_ICONV #include #endif diff --git a/DjVuFile/libdjvu/IFFByteStream.h b/DjVuFile/libdjvu/IFFByteStream.h index 26066a39a8..69f3043e38 100644 --- a/DjVuFile/libdjvu/IFFByteStream.h +++ b/DjVuFile/libdjvu/IFFByteStream.h @@ -132,7 +132,7 @@ #include #include #include "GException.h" -#include "GString.h" +#include "DjVuGString.h" #include "ByteStream.h" diff --git a/DjVuFile/libdjvu/JB2Image.h b/DjVuFile/libdjvu/JB2Image.h index 03e4d18173..4b651c46cc 100644 --- a/DjVuFile/libdjvu/JB2Image.h +++ b/DjVuFile/libdjvu/JB2Image.h @@ -173,7 +173,7 @@ //@{ -#include "GString.h" +#include "DjVuGString.h" #include "ZPCodec.h" diff --git a/DjVuFile/libdjvu/JPEGDecoder.cpp b/DjVuFile/libdjvu/JPEGDecoder.cpp index 4407c0679e..cf1ca1a917 100644 --- a/DjVuFile/libdjvu/JPEGDecoder.cpp +++ b/DjVuFile/libdjvu/JPEGDecoder.cpp @@ -99,7 +99,7 @@ extern "C" { #include "GPixmap.h" #ifdef LIBJPEGNAME #include "DjVuDynamic.h" -#include "GString.h" +#include "DjVuGString.h" #endif // LIBJPEGNAME diff --git a/DjVuFile/libdjvu/UnicodeByteStream.h b/DjVuFile/libdjvu/UnicodeByteStream.h index 65cadf0a5b..bedd539fd6 100644 --- a/DjVuFile/libdjvu/UnicodeByteStream.h +++ b/DjVuFile/libdjvu/UnicodeByteStream.h @@ -88,7 +88,7 @@ //@{ #include "DjVuGlobal.h" -#include "GString.h" +#include "DjVuGString.h" #include "ByteStream.h" diff --git a/DjVuFile/libdjvu/XMLTags.h b/DjVuFile/libdjvu/XMLTags.h index 0d3acfe428..48336badc9 100644 --- a/DjVuFile/libdjvu/XMLTags.h +++ b/DjVuFile/libdjvu/XMLTags.h @@ -69,7 +69,7 @@ // This is purely Lizardtech stuff. #include "GContainer.h" -#include "GString.h" +#include "DjVuGString.h" #ifdef HAVE_NAMESPACES namespace DJVU { diff --git a/DjVuFile/libdjvu/debug.cpp b/DjVuFile/libdjvu/debug.cpp index e26312f4bd..a1b87d2dda 100644 --- a/DjVuFile/libdjvu/debug.cpp +++ b/DjVuFile/libdjvu/debug.cpp @@ -69,8 +69,8 @@ #include "GThreads.h" #include "GContainer.h" -#include "GString.h" -#include "GString.h" +#include "DjVuGString.h" +#include "DjVuGString.h" #include "ByteStream.h" #include "GURL.h" diff --git a/PdfReader/PdfReader.cpp b/PdfReader/PdfReader.cpp index 4e190f9129..345f99eb58 100644 --- a/PdfReader/PdfReader.cpp +++ b/PdfReader/PdfReader.cpp @@ -171,13 +171,21 @@ namespace PdfReader pMeasurerCache->SetStreams(m_pInternal->m_pAppFonts->GetStreams()); m_pInternal->m_pFontManager->SetOwnerCache(pMeasurerCache); pMeasurerCache->SetCacheSize(1); - m_pInternal->m_pGlobalParams->SetFontManager(m_pInternal->m_pFontManager); + ((GlobalParamsAdaptor*)globalParams)->SetFontManager(m_pInternal->m_pFontManager); //------------------------------------------------------ - RELEASEOBJECT(m_pInternal->m_pPDFDocument); - m_pInternal->m_pPDFDocument = new PDFDoc(m_pInternal->m_pGlobalParams, data, length, owner_password, user_password); - m_eError = m_pInternal->m_pPDFDocument ? m_pInternal->m_pPDFDocument->GetErrorCode() : errorMemory; - if (!m_pInternal->m_pPDFDocument || !m_pInternal->m_pPDFDocument->CheckValidation()) + RELEASEOBJECT(m_pInternal->m_pPDFDocument); + m_eError = errNone; + GString* owner_pswd = NSStrings::CreateString(owner_password); + GString* user_pswd = NSStrings::CreateString(user_password); + m_pInternal->m_pPDFDocument = new PDFDoc(data, length, owner_pswd, user_pswd); + + delete owner_pswd; + delete user_pswd; + + m_eError = m_pInternal->m_pPDFDocument ? m_pInternal->m_pPDFDocument->getErrorCode() : errMemory; + + if (!m_pInternal->m_pPDFDocument || !m_pInternal->m_pPDFDocument->isOk()) { RELEASEOBJECT(m_pInternal->m_pPDFDocument); return false; @@ -185,7 +193,7 @@ namespace PdfReader m_pInternal->m_pFontList->Clear(); - return (errorNone == m_eError); + return (errNone == m_eError); } void CPdfReader::Close() { diff --git a/PdfReader/PdfReader.h b/PdfReader/PdfReader.h index 0c77393898..dae0313cc0 100644 --- a/PdfReader/PdfReader.h +++ b/PdfReader/PdfReader.h @@ -69,6 +69,8 @@ namespace PdfReader virtual bool LoadFromFile(const std::wstring& file, const std::wstring& options = L"", const std::wstring& owner_password = L"", const std::wstring& user_password = L""); + virtual bool LoadFromMemory(BYTE* data, DWORD length, const std::wstring& options = L"", + const std::wstring& owner_password = L"", const std::wstring& user_password = L""); virtual void Close(); @@ -80,6 +82,7 @@ namespace PdfReader virtual int GetPagesCount(); virtual void GetPageInfo(int nPageIndex, double* pdWidth, double* pdHeight, double* pdDpiX, double* pdDpiY); virtual void DrawPageOnRenderer(IRenderer* pRenderer, int nPageIndex, bool* pBreak); + virtual BYTE* ConvertToPixels(int nPageIndex, int nRasterW, int nRasterH, bool bIsFlip = false); virtual void ConvertToRaster(int nPageIndex, const std::wstring& path, int nImageType, const int nRasterW = -1, const int nRasterH = -1); int GetError(); @@ -94,6 +97,11 @@ namespace PdfReader NSFonts::IFontManager* GetFontManager(); std::wstring ToXml(const std::wstring& wsXmlPath); + #ifdef BUILDING_WASM_MODULE + virtual BYTE* GetStructure(); + virtual BYTE* GetGlyphs(int nPageIndex, int nRasterW, int nRasterH); + virtual BYTE* GetLinks (int nPageIndex, int nRasterW, int nRasterH); + #endif private: CPdfReader_Private* m_pInternal; diff --git a/PdfReader/Src/RendererOutputDev.cpp b/PdfReader/Src/RendererOutputDev.cpp index 9be057f72a..ea3b55f330 100644 --- a/PdfReader/Src/RendererOutputDev.cpp +++ b/PdfReader/Src/RendererOutputDev.cpp @@ -55,6 +55,10 @@ #include "../../HtmlRenderer/include/HTMLRenderer3.h" #include "../../PdfWriter/PdfRenderer.h" +#ifdef BUILDING_WASM_MODULE +#include +#include "../../DesktopEditor/graphics/GraphicsRenderer.h" +#endif // TODO: 1. Реализовать по-нормальному градиентные заливки (Axial и Radial) // 2. m_pRenderer->SetAdditionalParam(L"TilingHtmlPattern", oWriter.GetXmlString()); @@ -655,6 +659,22 @@ namespace PdfReader case fontCIDType2OT: wsExt = L".cid_2ot"; break; } + #ifdef BUILDING_WASM_MODULE + std::wstring wsTemp = ((GlobalParamsAdaptor *)globalParams)->GetTempFolder() + L"/x"; + int nTime = (int)time(NULL); + for (int nIndex = 0; nIndex < 1000; ++nIndex) + { + wsTempFileName = wsTemp + std::to_wstring(nTime + nIndex) + wsExt; + if (!CApplicationFontStreams::m_pMemoryStorage->Get(wsTempFileName)) + break; + } + + if (CApplicationFontStreams::m_pMemoryStorage->Get(wsTempFileName)) + { + pEntry->bAvailable = true; + return; + } + #else FILE* pTempFile = NULL; if (!NSFile::CFileBinary::OpenTempFile(&wsTempFileName, &pTempFile, L"wb", (wchar_t*)wsExt.c_str(), (wchar_t*)((GlobalParamsAdaptor *)globalParams)->GetTempFolder().c_str(), NULL)) @@ -665,6 +685,7 @@ namespace PdfReader pEntry->bAvailable = true; return; } + #endif Object oReferenceObject, oStreamObject; oReferenceObject.initRef(oEmbRef.num, oEmbRef.gen); @@ -674,23 +695,50 @@ namespace PdfReader { // Внедренный шрифт неправильно записан oStreamObject.free(); + #ifndef BUILDING_WASM_MODULE fclose(pTempFile); if (L"" != wsTempFileName) NSFile::CFileBinary::Remove(wsTempFileName); + #endif pEntry->bAvailable = true; return; } oStreamObject.streamReset(); + #ifdef BUILDING_WASM_MODULE + LONG nCurrentPos = 0; + LONG nCurrentSize = 0xffff; + BYTE* pTempStream = new BYTE[nCurrentSize]; + int nChar; + while ((nChar = oStreamObject.streamGetChar()) != EOF) + { + if (nCurrentPos >= nCurrentSize) + { + LONG nNewSize = nCurrentSize + 0xffff; + BYTE* NewBuffer = new BYTE[nNewSize]; + memcpy(NewBuffer, pTempStream, nCurrentSize); + RELEASEARRAYOBJECTS(pTempStream); + pTempStream = NewBuffer; + nCurrentSize = nNewSize; + } + pTempStream[nCurrentPos++] = nChar; + } + BYTE* pResBuffer = new BYTE[nCurrentPos]; + memcpy(pResBuffer, pTempStream, nCurrentPos); + RELEASEARRAYOBJECTS(pTempStream); + CApplicationFontStreams::m_pMemoryStorage->Add(wsTempFileName, pResBuffer, nCurrentPos, true); + RELEASEARRAYOBJECTS(pResBuffer); + #else int nChar; while ((nChar = oStreamObject.streamGetChar()) != EOF) { fputc(nChar, pTempFile); } + fclose(pTempFile); + #endif oStreamObject.streamClose(); oStreamObject.free(); - fclose(pTempFile); wsFileName = wsTempFileName; // Для шрифтов типа Type1 нужно дописать Afm файл с метриками diff --git a/PdfReader/lib/xpdf/PDFDoc.cc b/PdfReader/lib/xpdf/PDFDoc.cc index 7fad38c92d..fa451e3f64 100644 --- a/PdfReader/lib/xpdf/PDFDoc.cc +++ b/PdfReader/lib/xpdf/PDFDoc.cc @@ -218,6 +218,23 @@ PDFDoc::PDFDoc(char *fileNameA, GString *ownerPassword, ok = setup(ownerPassword, userPassword); } +PDFDoc::PDFDoc(BYTE* data, DWORD length, GString *ownerPassword, + GString *userPassword, PDFCore *coreA) +{ + Object obj; + + init(coreA); + + fileName = NULL; +#ifdef _WIN32 + fileNameU = NULL; +#endif + + obj.initNull(); + str = new MemStream((char*)data, 0, length, &obj); + ok = setup(ownerPassword, userPassword); +} + PDFDoc::PDFDoc(BaseStream *strA, GString *ownerPassword, GString *userPassword, PDFCore *coreA) { #ifdef _WIN32 diff --git a/PdfReader/lib/xpdf/PDFDoc.h b/PdfReader/lib/xpdf/PDFDoc.h index 5069cb7868..cbae6d635f 100644 --- a/PdfReader/lib/xpdf/PDFDoc.h +++ b/PdfReader/lib/xpdf/PDFDoc.h @@ -50,6 +50,8 @@ public: // Windows). PDFDoc(char *fileNameA, GString *ownerPassword = NULL, GString *userPassword = NULL, PDFCore *coreA = NULL); + PDFDoc(BYTE* data, DWORD length, GString *ownerPassword = NULL, + GString *userPassword = NULL, PDFCore *coreA = NULL); PDFDoc(BaseStream *strA, GString *ownerPassword = NULL, GString *userPassword = NULL, PDFCore *coreA = NULL);