00001 /* 00002 * String.hpp 00003 * 00004 * Copyright 2001-2008 by Oracle. All rights reserved. 00005 * 00006 * Oracle is a registered trademarks of Oracle Corporation and/or its 00007 * affiliates. 00008 * 00009 * This software is the confidential and proprietary information of Oracle 00010 * Corporation. You shall not disclose such confidential and proprietary 00011 * information and shall use it only in accordance with the terms of the 00012 * license agreement you entered into with Oracle. 00013 * 00014 * This notice may not be removed or altered. 00015 */ 00016 #ifndef COH_STRING_HPP 00017 #define COH_STRING_HPP 00018 00019 #include "coherence/lang/compatibility.hpp" 00020 00021 #include "coherence/lang/Array.hpp" 00022 #include "coherence/lang/Comparable.hpp" 00023 #include "coherence/lang/Object.hpp" 00024 00025 #include <memory> 00026 #include <ostream> 00027 #include <sstream> 00028 #include <string> 00029 00030 COH_OPEN_NAMESPACE2(coherence,lang) 00031 00032 /** 00033 * @internal 00034 * 00035 * Used to proced protected inheritance of Array<octet_t> by String, as 00036 * spec based class definitions don't have a notion of protected 00037 * inheritance. 00038 */ 00039 class COH_EXPORT_SPEC ProtectedOctetArray 00040 : protected Array<octet_t> 00041 { 00042 public: 00043 typedef Array<octet_t>::super super; 00044 typedef Array<octet_t>::alias alias; 00045 00046 protected: 00047 ProtectedOctetArray(size32_t cb) 00048 : Array<octet_t>(cb) 00049 {} 00050 00051 ProtectedOctetArray(const ProtectedOctetArray& that) 00052 : Array<octet_t>(that) 00053 {} 00054 00055 ProtectedOctetArray(ProtectedOctetArray::View vThat, 00056 size32_t iFrom, size32_t iTo) 00057 : Array<octet_t>(vThat, iFrom, iTo) 00058 {} 00059 00060 virtual ~ProtectedOctetArray() 00061 {} 00062 }; 00063 00064 /** 00065 * A managed C-style (NUL terminated) string. 00066 * 00067 * In addition to exposing the underlying char array, the String class 00068 * supports tranformations to and from Unicode code points within the Basic 00069 * Multilingual Plane (BMP): 00070 * 00071 * <ul> 00072 * <li>UTF-8 BMP char array</li> 00073 * <li>UTF-16 BMP wchar_t array (on platforms where wchar_t is >= 16 bits)</li> 00074 * <li>UTF-8 BMP octet_t array</li> 00075 * <li>UTF-16 BMP char16_t array</li> 00076 * </ul> 00077 * 00078 * Note: the ASCII character set is a subset of UTF-8 BMP. 00079 * 00080 * Unlike most managed types in the Coherence class hierarchy, Strings are 00081 * auto-boxable by default. That is a String::Handle or String::View can be 00082 * directly asssigned from or to common string representations. For example 00083 * the following code is legal: 00084 * @code 00085 * String::Handle hs = "hello world"; 00086 * @endcode 00087 * as is 00088 * @code 00089 * void someFunction(String::View vs); 00090 * 00091 * someFunction("some value"); 00092 * @endcode 00093 * 00094 * @see StringHandle for details 00095 * 00096 * @author mf/jh/djl 2007.07.05 00097 */ 00098 class COH_EXPORT String 00099 : public cloneable_spec<String, 00100 extends<ProtectedOctetArray>, 00101 implements<Comparable> > 00102 { 00103 friend class factory<String>; 00104 00105 // ----- constants ------------------------------------------------------ 00106 00107 public: 00108 /** 00109 * The largest possible value of type size32_t. 00110 */ 00111 static const size32_t npos = size32_t(-1); 00112 00113 00114 // ----- typedefs ------------------------------------------------------- 00115 00116 public: 00117 /** 00118 * While StringHandle boxes a number of common string types, String is 00119 * still compatible with BoxHandle, and when used with it can box to 00120 * only one type. By default Strings are boxable from a number of 00121 * types, see StringHandle for details. 00122 */ 00123 typedef std::string BoxedType; 00124 00125 00126 // ----- nested class: StringHandle ------------------------------------- 00127 00128 public: 00129 /** 00130 * StringHandle provides standard TypedHandle feaures as well as 00131 * auto-boxing support for standard string types including: 00132 * 00133 * <ul> 00134 * <li>char[] C-style NUL terminated char array</li> 00135 * <li>wchar_t[] C-style NUL terminated wide char array</li> 00136 * <li>std::string STL string</li> 00137 * <li>std::wstring STL wide string</li> 00138 * </ul> 00139 * 00140 * Unboxing to char[] and wchar[] is not supported as it is unsafe to 00141 * maintain a reference to the underlying character array without 00142 * holding a reference to the String. Unboxing to std::string, and 00143 * std::wstring is both supported and safe. 00144 */ 00145 template<class T> class StringHandle 00146 : public TypedHandle<T> 00147 { 00148 // ----- constructors --------------------------------------- 00149 00150 public: 00151 /** 00152 * Create an empty StringHandle. 00153 */ 00154 StringHandle() 00155 : TypedHandle<T>() 00156 { 00157 } 00158 00159 /** 00160 * Create a new StringHandle from a boxable type. 00161 */ 00162 StringHandle(const char* ach) 00163 : TypedHandle<T>() 00164 { 00165 if (NULL != ach) 00166 { 00167 TypedHandle<T>::set(get_pointer(T::create(ach))); 00168 } 00169 } 00170 00171 /** 00172 * Create a new StringHandle from a boxable type. 00173 */ 00174 StringHandle(const wchar_t* ach) 00175 : TypedHandle<T>() 00176 { 00177 if (NULL != ach) 00178 { 00179 TypedHandle<T>::set(get_pointer(T::create(ach))); 00180 } 00181 } 00182 00183 /** 00184 * Create a new StringHandle from a boxable type. 00185 */ 00186 StringHandle(const std::string& s) 00187 : TypedHandle<T>() 00188 { 00189 TypedHandle<T>::set(get_pointer(T::create(s))); 00190 } 00191 00192 /** 00193 * Create a new StringHandle from a boxable type. 00194 */ 00195 StringHandle(const std::wstring& ws) 00196 : TypedHandle<T>() 00197 { 00198 TypedHandle<T>::set(get_pointer(T::create(ws))); 00199 } 00200 00201 /** 00202 * Create a new StringHandle from the TypedHandle with a type 00203 * conversion. 00204 */ 00205 template<class O> StringHandle<T>(const TypedHandle<O>& h) 00206 : TypedHandle<T>() 00207 { 00208 TypedHandle<T>::set(get_pointer(h)); 00209 } 00210 00211 /** 00212 * Create a new StringHandle from the raw pointer. 00213 */ 00214 StringHandle(T* o) 00215 : TypedHandle<T>() 00216 { 00217 TypedHandle<T>::set(o); 00218 } 00219 00220 /** 00221 * The destructor. 00222 */ 00223 ~StringHandle() 00224 { 00225 } 00226 00227 // ----- operators ------------------------------------------ 00228 00229 public: 00230 /** 00231 * The assignment operator. 00232 */ 00233 template<class O> 00234 StringHandle& operator=(const TypedHandle<O>& h) 00235 { 00236 TypedHandle<T>::set(get_pointer(h)); 00237 return *this; 00238 } 00239 00240 /** 00241 * The "boxing" operator. 00242 */ 00243 StringHandle& operator=(const char* ach) 00244 { 00245 if (NULL == ach) 00246 { 00247 TypedHandle<T>::set(NULL); 00248 } 00249 else 00250 { 00251 TypedHandle<T>::set(get_pointer(T::create(ach))); 00252 } 00253 return *this; 00254 } 00255 00256 /** 00257 * The "boxing" operator. 00258 */ 00259 StringHandle& operator=(const wchar_t* ach) 00260 { 00261 if (NULL == ach) 00262 { 00263 TypedHandle<T>::set(NULL); 00264 } 00265 else 00266 { 00267 TypedHandle<T>::set(get_pointer(T::create(ach))); 00268 } 00269 return *this; 00270 } 00271 00272 /** 00273 * The "boxing" operator. 00274 */ 00275 StringHandle& operator=(const std::string& s) 00276 { 00277 TypedHandle<T>::set(get_pointer(T::create(s))); 00278 return *this; 00279 } 00280 00281 /** 00282 * The "boxing" operator. 00283 */ 00284 StringHandle& operator=(const std::wstring& ws) 00285 { 00286 TypedHandle<T>::set(get_pointer(T::create(ws))); 00287 return *this; 00288 } 00289 00290 /** 00291 * The "unboxing" operator. 00292 * 00293 * @return a copy of the referenced Object 00294 */ 00295 operator std::string() const 00296 { 00297 const T* pT = TypedHandle<T>::get(); 00298 if (NULL == pT) 00299 { 00300 coh_throw_npe(typeid(T)); 00301 } 00302 return (std::string) *pT; 00303 } 00304 00305 /** 00306 * The "unboxing" operator. 00307 * 00308 * @return a copy of the referenced Object 00309 */ 00310 operator std::wstring() const 00311 { 00312 const T* pT = TypedHandle<T>::get(); 00313 if (NULL == pT) 00314 { 00315 coh_throw_npe(typeid(T)); 00316 } 00317 return (std::wstring) *pT; 00318 } 00319 00320 /** 00321 * The equality operator. 00322 */ 00323 template<class O> 00324 bool operator==(const TypedHandle<O>& h) 00325 { 00326 return ((const Object*) get_pointer(*this)) == 00327 ((const Object*) get_pointer(h)); 00328 } 00329 00330 /** 00331 * The equality operator. 00332 */ 00333 bool operator==(const Object* cpo) const 00334 { 00335 return ((const Object*) get_pointer(*this)) == 00336 ((const Object*) cpo); 00337 } 00338 00339 /** 00340 * The inequality operator. 00341 */ 00342 template<class O> 00343 bool operator!=(const TypedHandle<O>& h) 00344 { 00345 return !operator==(h); 00346 } 00347 00348 /** 00349 * The inequality operator. 00350 */ 00351 bool operator!=(const Object* cpo) const 00352 { 00353 return !operator==(cpo); 00354 } 00355 }; 00356 00357 // ----- handle definitions --------------------------------------------- 00358 00359 public: 00360 /** 00361 * Handle definition. 00362 */ 00363 typedef StringHandle<String> Handle; 00364 00365 /** 00366 * View definition. 00367 */ 00368 typedef StringHandle<const String> View; 00369 00370 00371 // ----- constructors --------------------------------------------------- 00372 00373 private: 00374 /** 00375 * Create a String from a C-style NUL terminated char array. 00376 * 00377 * @param ach the NUL terminated string of chars to copy 00378 * 00379 * @throws IllegalArgumentException if any of the elements in the 00380 * array are not UTF-8 BMP 00381 */ 00382 String(const char* achSrc = ""); 00383 00384 /** 00385 * Create a String from a C-style NUL terminated wide char array. 00386 * 00387 * @param ach the NUL terminated string of wide chars to copy 00388 * 00389 * @throws IllegalArgumentException if any of the elements in the 00390 * array are not UTF-16 BMP 00391 */ 00392 String(const wchar_t* achSrc); 00393 00394 /** 00395 * Create a String from an STL string. 00396 * 00397 * @param s the STL string to copy 00398 * 00399 * @throws IllegalArgumentException if any of the elements in the 00400 * array are not UTF-8 BMP 00401 */ 00402 String(const std::string& s); 00403 00404 /** 00405 * Create a String from an STL wstring. 00406 * 00407 * @param ws the STL wstring to copy 00408 * 00409 * @throws IllegalArgumentException if any of the elements in the 00410 * array are not UTF-16 BMP 00411 */ 00412 String(const std::wstring& ws); 00413 00414 /** 00415 * Create a String from a char array. 00416 * 00417 * @param vach the array of chars to copy 00418 * @param of the offset at which to start copying 00419 * @param cch the number of chars to copy; if npos, copy all 00420 * subsequent chars in the array 00421 * 00422 * @throws IndexOutOfBoundsException if of > vach->length or if 00423 * cch < npos and of + cch > vach->length 00424 * @throws IllegalArgumentException if any of the elements in the 00425 * array are not UTF-8 BMP 00426 */ 00427 String(Array<char>::View vachSrc, size32_t of = 0, size32_t cch = npos); 00428 00429 /** 00430 * Create a String from a wide char array. 00431 * 00432 * @param vach the array of chars to copy 00433 * @param of the offset at which to start copying 00434 * @param cch the number of chars to copy; if npos, copy all 00435 * subsequent chars in the array 00436 * 00437 * @throws IndexOutOfBoundsException if of > vach->length or if 00438 * cch < npos and of + cch > vach->length 00439 * @throws IllegalArgumentException if any of the elements in the 00440 * array are not UTF-16 BMP 00441 * @throws UnsupportedOperationException if sizeof(wchar_t) < 00442 * sizeof(char16_t) 00443 */ 00444 String(Array<wchar_t>::View vachSrc, size32_t of = 0, size32_t cch = npos); 00445 00446 /** 00447 * Create a String from an octet array. 00448 * 00449 * @param vab the array of octets to copy 00450 * @param of the offset at which to start copying 00451 * @param cb the number of octets to copy; if npos, copy all 00452 * subsequent octets in the array 00453 * 00454 * @throws IndexOutOfBoundsException if of > vab->length or if 00455 * cb < npos and of + cb > vab->length 00456 * @throws IllegalArgumentException if any of the elements in the 00457 * array are not UTF-8 BMP 00458 */ 00459 String(Array<octet_t>::View vabSrc, size32_t of = 0, size32_t cb = npos); 00460 00461 /** 00462 * Create a String from a 16-bit char array. 00463 * 00464 * @param vach the array of chars to copy 00465 * @param of the offset at which to start copying 00466 * @param cch the number of chars to copy; if npos, copy all 00467 * subsequent chars in the array 00468 * 00469 * @throws IndexOutOfBoundsException if of > vach->length or if 00470 * cch < npos and of + cch > vach->length 00471 * @throws IllegalArgumentException if any of the elements in the 00472 * array are not UTF-16 BMP 00473 */ 00474 String(Array<char16_t>::View vachSrc, size32_t of = 0, size32_t cch = npos); 00475 00476 /** 00477 * Copy constructor. 00478 */ 00479 String(const String& that); 00480 00481 00482 // ----- String interface ----------------------------------------------- 00483 00484 public: 00485 /** 00486 * Return true iff the String contains only ASCII (ISO-8859-1) 00487 * characters. In this case each character is represented by a single 00488 * char, otherwise a character can take between one and three chars. 00489 * 00490 * @return true iff the String contains only ASCII characters 00491 */ 00492 virtual bool isASCII() const; 00493 00494 /** 00495 * Return the number of unicode code points (characters) in this String. 00496 * 00497 * @return the number of characters in this String 00498 */ 00499 virtual size32_t length() const; 00500 00501 /** 00502 * Return the String as a C-style NUL terminated char array. 00503 * 00504 * If the String is non-ASCII then the String::next() method may be 00505 * used to expand the char array into a sequence of char16_t unicode 00506 * characters. 00507 * 00508 * The returned array's lifetime is bound to the lifetime of the 00509 * String which it was returned from. Specifically it is unsafe to use 00510 * the returned char* while not holding a handle to the String. 00511 * 00512 * @return the char array representing the String. 00513 */ 00514 virtual const char* getCString() const; 00515 00516 /** 00517 * Compare this String against the supplied C-style string. 00518 * 00519 * @param ach the NUL terminated C-style string to compare to this 00520 * String 00521 * 00522 * @return true iff the two strings are identical 00523 */ 00524 virtual bool equals(const char* ach) const; 00525 00526 /** 00527 * Compare this String against the supplied C-style wide char string. 00528 * 00529 * @param ach the NUL terminated C-style string to compare to this 00530 * String 00531 * 00532 * @return true iff the two strings are identical 00533 * 00534 * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t) 00535 */ 00536 virtual bool equals(const wchar_t* ach) const; 00537 00538 /** 00539 * Compare this String against the supplied STL string. 00540 * 00541 * @param s the STL string to compare to this String 00542 * 00543 * @return true iff the two strings are identical 00544 */ 00545 virtual bool equalsStd(const std::string& s) const; 00546 00547 /** 00548 * Compare this String against the supplied STL wstring. 00549 * 00550 * @param ws the STL wstring to compare to this String 00551 * 00552 * @return true iff the two strings are identical 00553 * 00554 * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t) 00555 */ 00556 virtual bool equalsStd(const std::wstring& ws) const; 00557 00558 /** 00559 * Return the index of a substring within this String. 00560 * 00561 * @param vsSearch the substring to search for in vsSource 00562 * @param iBegin the location in the string to start searching 00563 * 00564 * @return the index of the substring found within this String or npos 00565 */ 00566 virtual size32_t indexOf(String::View vsSearch, 00567 size32_t iBegin = 0) const; 00568 00569 /** 00570 * Return the index of a character within this String. 00571 * 00572 * @param chSearch the character to search for in this String 00573 * @param iBegin the location in this String to start searching 00574 * 00575 * @return the index of the character found within this String or npos 00576 */ 00577 virtual size32_t indexOf(char16_t chSearch, 00578 size32_t iBegin = 0) const; 00579 00580 /** 00581 * Return the index of a substring within this String by searching 00582 * backward from the given beginning index. 00583 * 00584 * @param vsSearh the substring to search for within this String 00585 * @param iBegin the location in this String to start searching 00586 * 00587 * @return the index of the substring found within this String or npos 00588 */ 00589 virtual size32_t lastIndexOf(String::View vsSearch, 00590 size32_t iBegin = npos) const; 00591 00592 /** 00593 * Return the index of a substring within this String by searching 00594 * backward from the given beginning index. 00595 * 00596 * @param chSearch the character to search for in this String 00597 * @param iBegin the location in this String to start searching 00598 * 00599 * @return the index of the character found within this String or npos 00600 */ 00601 virtual size32_t lastIndexOf(char16_t chSearch, 00602 size32_t iBegin = npos) const; 00603 00604 /** 00605 * Return a new String comprised of the substring of this string 00606 * from iBegin (inclusive) to iEnd (exclusive). 00607 * 00608 * @param iBegin the starting index from which to create the string 00609 * @param iEnd the index of where the substring should stop 00610 * in this String or npos for end of string 00611 * 00612 * @return the new substring created from this String 00613 */ 00614 virtual String::View substring(size32_t iBegin, 00615 size32_t iEnd = npos) const; 00616 00617 /** 00618 * Return true if this String starts with the supplied String. 00619 * 00620 * @param vsSearch the string to search for 00621 * 00622 * @return true if this String starts with vsSearch 00623 */ 00624 virtual bool startsWith(String::View vsSearch) const; 00625 00626 /** 00627 * Return true if this String ends with the supplied Strng. 00628 * 00629 * @param vsSearch the string to search for 00630 * 00631 * @return true if this String ends with vsSearch 00632 */ 00633 virtual bool endsWith(String::View vsSearch) const; 00634 00635 /** 00636 * A substring of this String is compared to a substring of a supplied 00637 * String. 00638 * 00639 * @param ofSource the offset in this String where comparison begins 00640 * @param vsOther the String whose substring is compared against 00641 * this String 00642 * @param ofOther the offset in vsOther where comparison begins 00643 * @param cch the count of characters to compare 00644 * 00645 * @return the result of the two substrings 00646 */ 00647 virtual bool regionMatches(size32_t ofSourse, 00648 String::View vsOther, size32_t ofOther = 0, 00649 size32_t cch = npos) const; 00650 00651 /** 00652 * Return a String that is the result of removing all leading and 00653 * trailing white space. 00654 * 00655 * @return a trimmed copy of this String 00656 */ 00657 String::View trim() const; 00658 00659 /** 00660 * Return the unerlying UTF-8 BMP NUL terminated Array<octet_t>. 00661 * 00662 * For performance reasons the returned Array may not support cloneing. 00663 * If clone() is called the result will a String, which depending on 00664 * the compiler's handling of dynamic_cast to a private super class may 00665 * fail to be castable to an Array<octet_t>. 00666 * 00667 * @return the Array<octet_t> 00668 */ 00669 virtual Array<octet_t>::View getOctets() const; 00670 00671 /** 00672 * Return an STL string containing the same contents as this String. 00673 * 00674 * @return a new STL string. 00675 */ 00676 virtual operator std::string() const; 00677 00678 /** 00679 * Return an STL wstring containing the same contents as this String. 00680 * 00681 * @return a new STL wstring. 00682 * 00683 * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t) 00684 */ 00685 virtual operator std::wstring() const; 00686 00687 00688 // ----- Comparable interface ------------------------------------------- 00689 00690 public: 00691 /** 00692 * {@inheritDoc} 00693 */ 00694 virtual int32_t compareTo(Object::View v) const; 00695 00696 00697 // ----- Object interface ----------------------------------------------- 00698 00699 public: 00700 /** 00701 * {@inheritDoc} 00702 */ 00703 virtual size32_t hashCode() const; 00704 00705 /** 00706 * {@inheritDoc} 00707 */ 00708 virtual void toStream(std::ostream& out) const; 00709 00710 /** 00711 * {@inheritDoc} 00712 */ 00713 virtual bool isImmutable() const; 00714 00715 /** 00716 * {@inheritDoc} 00717 */ 00718 virtual bool equals(Object::View v) const; 00719 00720 /** 00721 * {@inheritDoc} 00722 */ 00723 virtual size32_t sizeOf() const; 00724 00725 00726 // ----- static helpers ------------------------------------------------- 00727 00728 public: 00729 /** 00730 * Return the Unicode character as UTF-16 from the char array, and 00731 * increment the pointer such that it references the start of the 00732 * next Unicode character. 00733 * 00734 * @param ach pointer to the start of the next UTF-8 code point. 00735 * 00736 * @return the next Unicode character 00737 * 00738 * @throws IllegalArgumentException if a non UTF-8 BMP sequence is 00739 * encountered 00740 */ 00741 static char16_t next(const char*& ach); 00742 00743 00744 // ----- data members --------------------------------------------------- 00745 00746 protected: 00747 /** 00748 * The number of unicode code points (characters) in the String. 00749 */ 00750 size32_t m_ccp; 00751 00752 00753 // ----- constants ------------------------------------------------------ 00754 00755 public: 00756 /** 00757 * String referncing NULL. 00758 */ 00759 static const String::Handle NULL_STRING; 00760 }; 00761 00762 00763 // ----- helper macros ------------------------------------------------------ 00764 00765 /** 00766 * This macro will take any set of streamable contents and turn them into a 00767 * coherence#lang#String instance. 00768 * 00769 * @param CONTENTS the contents to use in constructing the String. 00770 * 00771 * Usage example: 00772 * @code 00773 * String::Handle hsFoo = COH_TO_STRING("This value: " << 5 << " is my value"); 00774 * @endcode 00775 */ 00776 #define COH_TO_STRING(CONTENTS) \ 00777 coherence::lang::String::create(((std::stringstream&) \ 00778 (*(std::auto_ptr<std::stringstream>(new std::stringstream())) \ 00779 << CONTENTS)).str()) 00780 00781 COH_CLOSE_NAMESPACE2 00782 00783 #endif // COH_STRING_HPP