coherence/lang/String.hpp

00001 /*
00002 * String.hpp
00003 *
00004 * Copyright 2001-2008 by Oracle. All rights reserved.
00005 *
00006 * Oracle is a registered trademarks of Oracle Corporation and/or its
00007 * affiliates.
00008 *
00009 * This software is the confidential and proprietary information of Oracle
00010 * Corporation. You shall not disclose such confidential and proprietary
00011 * information and shall use it only in accordance with the terms of the
00012 * license agreement you entered into with Oracle.
00013 *
00014 * This notice may not be removed or altered.
00015 */
00016 #ifndef COH_STRING_HPP
00017 #define COH_STRING_HPP
00018 
00019 #include "coherence/lang/compatibility.hpp"
00020 
00021 #include "coherence/lang/Array.hpp"
00022 #include "coherence/lang/Comparable.hpp"
00023 #include "coherence/lang/Object.hpp"
00024 
00025 #include <memory>
00026 #include <ostream>
00027 #include <sstream>
00028 #include <string>
00029 
00030 COH_OPEN_NAMESPACE2(coherence,lang)
00031 
00032 /**
00033 * @internal
00034 *
00035 * Used to proced protected inheritance of Array<octet_t> by String, as
00036 * spec based class definitions don't have a notion of protected
00037 * inheritance.
00038 */
00039 class COH_EXPORT_SPEC ProtectedOctetArray
00040     : protected Array<octet_t>
00041     {
00042     public:
00043         typedef Array<octet_t>::super super;
00044         typedef Array<octet_t>::alias alias;
00045 
00046     protected:
00047         ProtectedOctetArray(size32_t cb)
00048             : Array<octet_t>(cb)
00049             {}
00050 
00051         ProtectedOctetArray(const ProtectedOctetArray& that)
00052             : Array<octet_t>(that)
00053             {}
00054 
00055         ProtectedOctetArray(ProtectedOctetArray::View vThat,
00056             size32_t iFrom, size32_t iTo)
00057             : Array<octet_t>(vThat, iFrom, iTo)
00058             {}
00059 
00060         virtual ~ProtectedOctetArray()
00061             {}
00062     };
00063 
00064 /**
00065 * A managed C-style (NUL terminated) string.
00066 *
00067 * In addition to exposing the underlying char array, the String class
00068 * supports tranformations to and from Unicode code points within the Basic
00069 * Multilingual Plane (BMP):
00070 *
00071 * <ul>
00072 * <li>UTF-8  BMP char array</li>
00073 * <li>UTF-16 BMP wchar_t array (on platforms where wchar_t is >= 16 bits)</li>
00074 * <li>UTF-8  BMP octet_t array</li>
00075 * <li>UTF-16 BMP char16_t array</li>
00076 * </ul>
00077 *
00078 * Note: the ASCII character set is a subset of UTF-8 BMP.
00079 *
00080 * Unlike most managed types in the Coherence class hierarchy, Strings are
00081 * auto-boxable by default. That is a String::Handle or String::View can be
00082 * directly asssigned from or to common string representations.  For example
00083 * the following code is legal:
00084 * @code
00085 * String::Handle hs = "hello world";
00086 * @endcode
00087 * as is
00088 * @code
00089 * void someFunction(String::View vs);
00090 *
00091 * someFunction("some value");
00092 * @endcode
00093 *
00094 * @see StringHandle for details
00095 *
00096 * @author mf/jh/djl  2007.07.05
00097 */
00098 class COH_EXPORT String
00099     : public cloneable_spec<String,
00100         extends<ProtectedOctetArray>,
00101         implements<Comparable> >
00102     {
00103     friend class factory<String>;
00104 
00105     // ----- constants ------------------------------------------------------
00106 
00107     public:
00108         /**
00109         * The largest possible value of type size32_t.
00110         */
00111         static const size32_t npos = size32_t(-1);
00112 
00113 
00114     // ----- typedefs -------------------------------------------------------
00115 
00116     public:
00117         /**
00118         * While StringHandle boxes a number of common string types, String is
00119         * still compatible with BoxHandle, and when used with it can box to
00120         * only one type. By default Strings are boxable from a number of
00121         * types, see StringHandle for details.
00122         */
00123         typedef std::string BoxedType;
00124 
00125 
00126     // ----- nested class: StringHandle -------------------------------------
00127 
00128     public:
00129         /**
00130         * StringHandle provides standard TypedHandle feaures as well as
00131         * auto-boxing support for standard string types including:
00132         *
00133         * <ul>
00134         * <li>char[]       C-style NUL terminated char array</li>
00135         * <li>wchar_t[]    C-style NUL terminated wide char array</li>
00136         * <li>std::string  STL string</li>
00137         * <li>std::wstring STL wide string</li>
00138         * </ul>
00139         *
00140         * Unboxing to char[] and wchar[] is not supported as it is unsafe to
00141         * maintain a reference to the underlying character array without
00142         * holding a reference to the String. Unboxing to std::string, and
00143         * std::wstring is both supported and safe.
00144         */
00145         template<class T> class StringHandle
00146             : public TypedHandle<T>
00147             {
00148             // ----- constructors ---------------------------------------
00149 
00150             public:
00151                 /**
00152                 * Create an empty StringHandle.
00153                 */
00154                 StringHandle()
00155                         : TypedHandle<T>()
00156                     {
00157                     }
00158 
00159                 /**
00160                 * Create a new StringHandle from a boxable type.
00161                 */
00162                 StringHandle(const char* ach)
00163                         : TypedHandle<T>()
00164                     {
00165                     if (NULL != ach)
00166                         {
00167                         TypedHandle<T>::set(get_pointer(T::create(ach)));
00168                         }
00169                     }
00170 
00171                 /**
00172                 * Create a new StringHandle from a boxable type.
00173                 */
00174                 StringHandle(const wchar_t* ach)
00175                         : TypedHandle<T>()
00176                     {
00177                     if (NULL != ach)
00178                         {
00179                         TypedHandle<T>::set(get_pointer(T::create(ach)));
00180                         }
00181                     }
00182 
00183                 /**
00184                 * Create a new StringHandle from a boxable type.
00185                 */
00186                 StringHandle(const std::string& s)
00187                         : TypedHandle<T>()
00188                     {
00189                     TypedHandle<T>::set(get_pointer(T::create(s)));
00190                     }
00191 
00192                 /**
00193                 * Create a new StringHandle from a boxable type.
00194                 */
00195                 StringHandle(const std::wstring& ws)
00196                         : TypedHandle<T>()
00197                     {
00198                     TypedHandle<T>::set(get_pointer(T::create(ws)));
00199                     }
00200 
00201                 /**
00202                 * Create a new StringHandle from the TypedHandle with a type
00203                 * conversion.
00204                 */
00205                 template<class O> StringHandle<T>(const TypedHandle<O>& h)
00206                         : TypedHandle<T>()
00207                     {
00208                     TypedHandle<T>::set(get_pointer(h));
00209                     }
00210 
00211                 /**
00212                 * Create a new StringHandle from the raw pointer.
00213                 */
00214                 StringHandle(T* o)
00215                         : TypedHandle<T>()
00216                     {
00217                     TypedHandle<T>::set(o);
00218                     }
00219 
00220                 /**
00221                 * The destructor.
00222                 */
00223                 ~StringHandle()
00224                     {
00225                     }
00226 
00227             // ----- operators ------------------------------------------
00228 
00229             public:
00230                 /**
00231                 * The assignment operator.
00232                 */
00233                 template<class O>
00234                 StringHandle& operator=(const TypedHandle<O>& h)
00235                     {
00236                     TypedHandle<T>::set(get_pointer(h));
00237                     return *this;
00238                     }
00239 
00240                 /**
00241                 * The "boxing" operator.
00242                 */
00243                 StringHandle& operator=(const char* ach)
00244                     {
00245                     if (NULL == ach)
00246                         {
00247                         TypedHandle<T>::set(NULL);
00248                         }
00249                     else
00250                         {
00251                         TypedHandle<T>::set(get_pointer(T::create(ach)));
00252                         }
00253                     return *this;
00254                     }
00255 
00256                 /**
00257                 * The "boxing" operator.
00258                 */
00259                 StringHandle& operator=(const wchar_t* ach)
00260                     {
00261                     if (NULL == ach)
00262                         {
00263                         TypedHandle<T>::set(NULL);
00264                         }
00265                     else
00266                         {
00267                         TypedHandle<T>::set(get_pointer(T::create(ach)));
00268                         }
00269                     return *this;
00270                     }
00271 
00272                 /**
00273                 * The "boxing" operator.
00274                 */
00275                 StringHandle& operator=(const std::string& s)
00276                     {
00277                     TypedHandle<T>::set(get_pointer(T::create(s)));
00278                     return *this;
00279                     }
00280 
00281                 /**
00282                 * The "boxing" operator.
00283                 */
00284                 StringHandle& operator=(const std::wstring& ws)
00285                     {
00286                     TypedHandle<T>::set(get_pointer(T::create(ws)));
00287                     return *this;
00288                     }
00289 
00290                 /**
00291                 * The "unboxing" operator.
00292                 *
00293                 * @return a copy of the referenced Object
00294                 */
00295                 operator std::string() const
00296                     {
00297                     const T* pT = TypedHandle<T>::get();
00298                     if (NULL == pT)
00299                         {
00300                         coh_throw_npe(typeid(T));
00301                         }
00302                     return (std::string) *pT;
00303                     }
00304 
00305                 /**
00306                 * The "unboxing" operator.
00307                 *
00308                 * @return a copy of the referenced Object
00309                 */
00310                 operator std::wstring() const
00311                     {
00312                     const T* pT = TypedHandle<T>::get();
00313                     if (NULL == pT)
00314                         {
00315                         coh_throw_npe(typeid(T));
00316                         }
00317                     return (std::wstring) *pT;
00318                     }
00319 
00320                 /**
00321                 * The equality operator.
00322                 */
00323                 template<class O>
00324                 bool operator==(const TypedHandle<O>& h)
00325                    {
00326                    return ((const Object*) get_pointer(*this)) ==
00327                           ((const Object*) get_pointer(h));
00328                    }
00329 
00330                 /**
00331                 * The equality operator.
00332                 */
00333                 bool operator==(const Object* cpo) const
00334                     {
00335                     return ((const Object*) get_pointer(*this)) ==
00336                            ((const Object*) cpo);
00337                     }
00338 
00339                 /**
00340                 * The inequality operator.
00341                 */
00342                 template<class O>
00343                 bool operator!=(const TypedHandle<O>& h)
00344                     {
00345                     return !operator==(h);
00346                     }
00347 
00348                 /**
00349                 * The inequality operator.
00350                 */
00351                 bool operator!=(const Object* cpo) const
00352                     {
00353                     return !operator==(cpo);
00354                     }
00355             };
00356 
00357     // ----- handle definitions ---------------------------------------------
00358 
00359     public:
00360         /**
00361         * Handle definition.
00362         */
00363         typedef StringHandle<String> Handle;
00364 
00365         /**
00366         * View definition.
00367         */
00368         typedef StringHandle<const String> View;
00369 
00370 
00371     // ----- constructors ---------------------------------------------------
00372 
00373     private:
00374         /**
00375         * Create a String from a C-style NUL terminated char array.
00376         *
00377         * @param ach  the NUL terminated string of chars to copy
00378         *
00379         * @throws IllegalArgumentException if any of the elements in the
00380         *         array are not UTF-8 BMP
00381         */
00382         String(const char* achSrc = "");
00383 
00384         /**
00385         * Create a String from a C-style NUL terminated wide char array.
00386         *
00387         * @param ach  the NUL terminated string of wide chars to copy
00388         *
00389         * @throws IllegalArgumentException if any of the elements in the
00390         *         array are not UTF-16 BMP
00391         */
00392         String(const wchar_t* achSrc);
00393 
00394         /**
00395         * Create a String from an STL string.
00396         *
00397         * @param s  the STL string to copy
00398         *
00399         * @throws IllegalArgumentException if any of the elements in the
00400         *         array are not UTF-8 BMP
00401         */
00402         String(const std::string& s);
00403 
00404         /**
00405         * Create a String from an STL wstring.
00406         *
00407         * @param ws  the STL wstring to copy
00408         *
00409         * @throws IllegalArgumentException if any of the elements in the
00410         *         array are not UTF-16 BMP
00411         */
00412         String(const std::wstring& ws);
00413 
00414         /**
00415         * Create a String from a char array.
00416         *
00417         * @param vach  the array of chars to copy
00418         * @param of    the offset at which to start copying
00419         * @param cch   the number of chars to copy; if npos, copy all
00420         *              subsequent chars in the array
00421         *
00422         * @throws IndexOutOfBoundsException if of > vach->length or if
00423         *         cch < npos and of + cch > vach->length
00424         * @throws IllegalArgumentException if any of the elements in the
00425         *         array are not UTF-8 BMP
00426         */
00427         String(Array<char>::View vachSrc, size32_t of = 0, size32_t cch = npos);
00428 
00429         /**
00430         * Create a String from a wide char array.
00431         *
00432         * @param vach  the array of chars to copy
00433         * @param of    the offset at which to start copying
00434         * @param cch   the number of chars to copy; if npos, copy all
00435         *              subsequent chars in the array
00436         *
00437         * @throws IndexOutOfBoundsException if of > vach->length or if
00438         *         cch < npos and of + cch > vach->length
00439         * @throws IllegalArgumentException if any of the elements in the
00440         *         array are not UTF-16 BMP
00441         * @throws UnsupportedOperationException if sizeof(wchar_t) <
00442         *         sizeof(char16_t)
00443         */
00444         String(Array<wchar_t>::View vachSrc, size32_t of = 0, size32_t cch = npos);
00445 
00446         /**
00447         * Create a String from an octet array.
00448         *
00449         * @param vab  the array of octets to copy
00450         * @param of   the offset at which to start copying
00451         * @param cb   the number of octets to copy; if npos, copy all
00452         *             subsequent octets in the array
00453         *
00454         * @throws IndexOutOfBoundsException if of > vab->length or if
00455         *         cb < npos and of + cb > vab->length
00456         * @throws IllegalArgumentException if any of the elements in the
00457         *         array are not UTF-8 BMP
00458         */
00459         String(Array<octet_t>::View vabSrc, size32_t of = 0, size32_t cb = npos);
00460 
00461         /**
00462         * Create a String from a 16-bit char array.
00463         *
00464         * @param vach  the array of chars to copy
00465         * @param of    the offset at which to start copying
00466         * @param cch   the number of chars to copy; if npos, copy all
00467         *              subsequent chars in the array
00468         *
00469         * @throws IndexOutOfBoundsException if of > vach->length or if
00470         *         cch < npos and of + cch > vach->length
00471         * @throws IllegalArgumentException if any of the elements in the
00472         *         array are not UTF-16 BMP
00473         */
00474         String(Array<char16_t>::View vachSrc, size32_t of = 0, size32_t cch = npos);
00475 
00476         /**
00477         * Copy constructor.
00478         */
00479         String(const String& that);
00480 
00481 
00482     // ----- String interface -----------------------------------------------
00483 
00484     public:
00485         /**
00486         * Return true iff the String contains only ASCII (ISO-8859-1)
00487         * characters. In this case each character is represented by a single
00488         * char, otherwise a character can take between one and three chars.
00489         *
00490         * @return true iff the String contains only ASCII characters
00491         */
00492         virtual bool isASCII() const;
00493 
00494         /**
00495         * Return the number of unicode code points (characters) in this String.
00496         *
00497         * @return the number of characters in this String
00498         */
00499         virtual size32_t length() const;
00500 
00501         /**
00502         * Return the String as a C-style NUL terminated char array.
00503         *
00504         * If the String is non-ASCII then the String::next() method may be
00505         * used to expand the char array into a sequence of char16_t unicode
00506         * characters.
00507         *
00508         * The returned array's lifetime is bound to the lifetime of the
00509         * String which it was returned from. Specifically it is unsafe to use
00510         * the returned char* while not holding a handle to the String.
00511         *
00512         * @return the char array representing the String.
00513         */
00514         virtual const char* getCString() const;
00515 
00516         /**
00517         * Compare this String against the supplied C-style string.
00518         *
00519         * @param ach  the NUL terminated C-style string to compare to this
00520         *             String
00521         *
00522         * @return true iff the two strings are identical
00523         */
00524         virtual bool equals(const char* ach) const;
00525 
00526         /**
00527         * Compare this String against the supplied C-style wide char string.
00528         *
00529         * @param ach  the NUL terminated C-style string to compare to this
00530         *             String
00531         *
00532         * @return true iff the two strings are identical
00533         *
00534         * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t)
00535         */
00536         virtual bool equals(const wchar_t* ach) const;
00537 
00538         /**
00539         * Compare this String against the supplied STL string.
00540         *
00541         * @param s  the STL string to compare to this String
00542         *
00543         * @return true iff the two strings are identical
00544         */
00545         virtual bool equalsStd(const std::string& s) const;
00546 
00547         /**
00548         * Compare this String against the supplied STL wstring.
00549         *
00550         * @param ws  the STL wstring to compare to this String
00551         *
00552         * @return true iff the two strings are identical
00553         *
00554         * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t)
00555         */
00556         virtual bool equalsStd(const std::wstring& ws) const;
00557 
00558         /**
00559         * Return the index of a substring within this String.
00560         *
00561         * @param vsSearch  the substring to search for in vsSource
00562         * @param iBegin    the location in the string to start searching
00563         *
00564         * @return the index of the substring found within this String or npos
00565         */
00566         virtual size32_t indexOf(String::View vsSearch,
00567                 size32_t iBegin = 0) const;
00568 
00569         /**
00570         * Return the index of a character within this String.
00571         *
00572         * @param chSearch  the character to search for in this String
00573         * @param iBegin    the location in this String to start searching
00574         *
00575         * @return the index of the character found within this String or npos
00576         */
00577         virtual size32_t indexOf(char16_t chSearch,
00578                 size32_t iBegin = 0) const;
00579 
00580         /**
00581         * Return the index of a substring within this String by searching
00582         * backward from the given beginning index.
00583         *
00584         * @param vsSearh  the substring to search for within this String
00585         * @param iBegin   the location in this String to start searching
00586         *
00587         * @return the index of the substring found within this String or npos
00588         */
00589         virtual size32_t lastIndexOf(String::View vsSearch,
00590                 size32_t iBegin = npos) const;
00591 
00592         /**
00593         * Return the index of a substring within this String by searching
00594         * backward from the given beginning index.
00595         *
00596         * @param chSearch  the character to search for in this String
00597         * @param iBegin    the location in this String to start searching
00598         *
00599         * @return the index of the character found within this String or npos
00600         */
00601         virtual size32_t lastIndexOf(char16_t chSearch,
00602                 size32_t iBegin = npos) const;
00603 
00604         /**
00605         * Return a new String comprised of the substring of this string
00606         * from iBegin (inclusive) to iEnd (exclusive).
00607         *
00608         * @param iBegin    the starting index from which to create the string
00609         * @param iEnd      the index of where the substring should stop
00610         *                  in this String or npos for end of string
00611         *
00612         * @return the new substring created from this String
00613         */
00614         virtual String::View substring(size32_t iBegin,
00615                 size32_t iEnd = npos) const;
00616 
00617         /**
00618         * Return true if this String starts with the supplied String.
00619         *
00620         * @param vsSearch  the string to search for
00621         *
00622         * @return true if this String starts with vsSearch
00623         */
00624         virtual bool startsWith(String::View vsSearch) const;
00625 
00626         /**
00627         * Return true if this String ends with the supplied Strng.
00628         *
00629         * @param vsSearch  the string to search for
00630         *
00631         * @return true if this String ends with vsSearch
00632         */
00633         virtual bool endsWith(String::View vsSearch) const;
00634 
00635         /**
00636         * A substring of this String is compared to a substring of a supplied
00637         * String.
00638         *
00639         * @param ofSource  the offset in this String where comparison begins
00640         * @param vsOther   the String whose substring is compared against
00641         *                  this String
00642         * @param ofOther   the offset in vsOther where comparison begins
00643         * @param cch       the count of characters to compare
00644         *
00645         * @return the result of the two substrings
00646         */
00647         virtual bool regionMatches(size32_t ofSourse,
00648                 String::View vsOther, size32_t ofOther = 0,
00649                 size32_t cch = npos) const;
00650 
00651         /**
00652         * Return a String that is the result of removing all leading and
00653         * trailing white space.
00654         *
00655         * @return a trimmed copy of this String
00656         */
00657         String::View trim() const;
00658 
00659         /**
00660         * Return the unerlying UTF-8 BMP NUL terminated Array<octet_t>.
00661         *
00662         * For performance reasons the returned Array may not support cloneing.
00663         * If clone() is called the result will a String, which depending on
00664         * the compiler's handling of dynamic_cast to a private super class may
00665         * fail to be castable to an Array<octet_t>.
00666         *
00667         * @return the Array<octet_t>
00668         */
00669         virtual Array<octet_t>::View getOctets() const;
00670 
00671         /**
00672         * Return an STL string containing the same contents as this String.
00673         *
00674         * @return a new STL string.
00675         */
00676         virtual operator std::string() const;
00677 
00678         /**
00679         * Return an STL wstring containing the same contents as this String.
00680         *
00681         * @return a new STL wstring.
00682         *
00683         * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t)
00684         */
00685         virtual operator std::wstring() const;
00686 
00687 
00688     // ----- Comparable interface -------------------------------------------
00689 
00690     public:
00691         /**
00692         * {@inheritDoc}
00693         */
00694         virtual int32_t compareTo(Object::View v) const;
00695 
00696 
00697     // ----- Object interface -----------------------------------------------
00698 
00699     public:
00700         /**
00701         * {@inheritDoc}
00702         */
00703         virtual size32_t hashCode() const;
00704 
00705         /**
00706         * {@inheritDoc}
00707         */
00708         virtual void toStream(std::ostream& out) const;
00709 
00710         /**
00711         * {@inheritDoc}
00712         */
00713         virtual bool isImmutable() const;
00714 
00715         /**
00716         * {@inheritDoc}
00717         */
00718         virtual bool equals(Object::View v) const;
00719 
00720         /**
00721         * {@inheritDoc}
00722         */
00723         virtual size32_t sizeOf() const;
00724 
00725 
00726     // ----- static helpers -------------------------------------------------
00727 
00728     public:
00729         /**
00730         * Return the Unicode character as UTF-16 from the char array, and
00731         * increment the pointer such that it references the start of the
00732         * next Unicode character.
00733         *
00734         * @param ach  pointer to the start of the next UTF-8 code point.
00735         *
00736         * @return the next Unicode character
00737         *
00738         * @throws IllegalArgumentException  if a non UTF-8 BMP sequence is
00739         *                                   encountered
00740         */
00741         static char16_t next(const char*& ach);
00742 
00743 
00744     // ----- data members ---------------------------------------------------
00745 
00746     protected:
00747         /**
00748         * The number of unicode code points (characters) in the String.
00749         */
00750         size32_t m_ccp;
00751 
00752 
00753     // ----- constants ------------------------------------------------------
00754 
00755     public:
00756         /**
00757         * String referncing NULL.
00758         */
00759         static const String::Handle NULL_STRING;
00760     };
00761 
00762 
00763 // ----- helper macros ------------------------------------------------------
00764 
00765 /**
00766 * This macro will take any set of streamable contents and turn them into a
00767 * coherence#lang#String instance.
00768 *
00769 * @param CONTENTS  the contents to use in constructing the String.
00770 *
00771 * Usage example:
00772 * @code
00773 * String::Handle hsFoo = COH_TO_STRING("This value: " << 5 << " is my value");
00774 * @endcode
00775 */
00776 #define COH_TO_STRING(CONTENTS) \
00777     coherence::lang::String::create(((std::stringstream&) \
00778             (*(std::auto_ptr<std::stringstream>(new std::stringstream())) \
00779                 << CONTENTS)).str())
00780 
00781 COH_CLOSE_NAMESPACE2
00782 
00783 #endif // COH_STRING_HPP
Copyright (c) 2000-2008 Oracle. All rights reserved.