1 /*************************************************************************** 2 copyright : (C) 2002 - 2008 by Scott Wheeler 3 email : [email protected] 4 ***************************************************************************/ 5 6 /*************************************************************************** 7 * This library is free software; you can redistribute it and/or modify * 8 * it under the terms of the GNU Lesser General Public License version * 9 * 2.1 as published by the Free Software Foundation. * 10 * * 11 * This library is distributed in the hope that it will be useful, but * 12 * WITHOUT ANY WARRANTY; without even the implied warranty of * 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * 14 * Lesser General Public License for more details. * 15 * * 16 * You should have received a copy of the GNU Lesser General Public * 17 * License along with this library; if not, write to the Free Software * 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 19 * 02110-1301 USA * 20 * * 21 * Alternatively, this file is available under the Mozilla Public * 22 * License Version 1.1. You may obtain a copy of the License at * 23 * http://www.mozilla.org/MPL/ * 24 ***************************************************************************/ 25 26 #ifndef TAGLIB_STRING_H 27 #define TAGLIB_STRING_H 28 29 #include "taglib_export.h" 30 #include "taglib.h" 31 #include "tbytevector.h" 32 33 #include <string> 34 #include <iostream> 35 36 /*! 37 * \relates TagLib::String 38 * 39 * Converts a QString to a TagLib::String without a requirement to link to Qt. 40 * 41 * \note consider conversion via usual char-by-char for loop to avoid UTF16->UTF8->UTF16 42 * conversion happening in the background 43 */ 44 45 #if defined(QT_VERSION) && (QT_VERSION >= 0x040000) 46 #define QStringToTString(s) TagLib::String(s.toUtf8().data(), TagLib::String::UTF8) 47 #else 48 #define QStringToTString(s) TagLib::String(s.utf8().data(), TagLib::String::UTF8) 49 #endif 50 51 /*! 52 * \relates TagLib::String 53 * 54 * Converts a TagLib::String to a QString without a requirement to link to Qt. 55 * 56 * \note consider conversion via usual char-by-char for loop to avoid UTF16->UTF8->UTF16 57 * conversion happening in the background 58 * 59 */ 60 61 #define TStringToQString(s) QString::fromUtf8(s.toCString(true)) 62 63 namespace TagLib { 64 65 class StringList; 66 67 //! A \e wide string class suitable for unicode. 68 69 /*! 70 * This is an implicitly shared \e wide string. For storage it uses 71 * TagLib::wstring, but as this is an <i>implementation detail</i> this of 72 * course could change. Strings are stored internally as UTF-16(without BOM/ 73 * CPU byte order) 74 * 75 * The use of implicit sharing means that copying a string is cheap, the only 76 * \e cost comes into play when the copy is modified. Prior to that the string 77 * just has a pointer to the data of the \e parent String. This also makes 78 * this class suitable as a function return type. 79 * 80 * In addition to adding implicit sharing, this class keeps track of four 81 * possible encodings, which are the four supported by the ID3v2 standard. 82 */ 83 84 class TAGLIB_EXPORT String 85 { 86 public: 87 88 #ifndef DO_NOT_DOCUMENT 89 typedef TagLib::wstring::iterator Iterator; 90 typedef TagLib::wstring::const_iterator ConstIterator; 91 #endif 92 93 /** 94 * The four types of string encodings supported by the ID3v2 specification. 95 * ID3v1 is assumed to be Latin1 and Ogg Vorbis comments use UTF8. 96 */ 97 enum Type { 98 /*! 99 * IS08859-1, or <i>Latin1</i> encoding. 8 bit characters. 100 */ 101 Latin1 = 0, 102 /*! 103 * UTF16 with a <i>byte order mark</i>. 16 bit characters. 104 */ 105 UTF16 = 1, 106 /*! 107 * UTF16 <i>big endian</i>. 16 bit characters. This is the encoding used 108 * internally by TagLib. 109 */ 110 UTF16BE = 2, 111 /*! 112 * UTF8 encoding. Characters are usually 8 bits but can be up to 32. 113 */ 114 UTF8 = 3, 115 /*! 116 * UTF16 <i>little endian</i>. 16 bit characters. 117 */ 118 UTF16LE = 4 119 }; 120 121 /*! 122 * Constructs an empty String. 123 */ 124 String(); 125 126 /*! 127 * Make a shallow, implicitly shared, copy of \a s. Because this is 128 * implicitly shared, this method is lightweight and suitable for 129 * pass-by-value usage. 130 */ 131 String(const String &s); 132 133 /*! 134 * Makes a deep copy of the data in \a s. 135 * 136 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when 137 * used with other codecs it will simply print a warning and exit. 138 */ 139 String(const std::string &s, Type t = Latin1); 140 141 /*! 142 * Makes a deep copy of the data in \a s. 143 * 144 * /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless 145 * of the CPU byte order. If UTF16BE, it will not be swapped. This behavior 146 * will be changed in TagLib2.0. 147 */ 148 String(const wstring &s, Type t = UTF16BE); 149 150 /*! 151 * Makes a deep copy of the data in \a s. 152 * 153 * /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless 154 * of the CPU byte order. If UTF16BE, it will not be swapped. This behavior 155 * will be changed in TagLib2.0. 156 */ 157 String(const wchar_t *s, Type t = UTF16BE); 158 159 /*! 160 * Makes a deep copy of the data in \a c. 161 * 162 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when 163 * used with other codecs it will simply print a warning and exit. 164 */ 165 String(char c, Type t = Latin1); 166 167 /*! 168 * Makes a deep copy of the data in \a c. 169 */ 170 String(wchar_t c, Type t = Latin1); 171 172 /*! 173 * Makes a deep copy of the data in \a s. 174 * 175 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when 176 * used with other codecs it will simply print a warning and exit. 177 */ 178 String(const char *s, Type t = Latin1); 179 180 /*! 181 * Makes a deep copy of the data in \a v. 182 */ 183 String(const ByteVector &v, Type t = Latin1); 184 185 /*! 186 * Destroys this String instance. 187 */ 188 virtual ~String(); 189 190 /*! 191 * Returns a deep copy of this String as an std::string. The returned string 192 * is encoded in UTF8 if \a unicode is true, otherwise Latin1. 193 * 194 * \see toCString() 195 */ 196 std::string to8Bit(bool unicode = false) const; 197 198 /*! 199 * Returns a deep copy of this String as a wstring. The returned string is 200 * encoded in UTF-16 (without BOM/CPU byte order), not UTF-32 even if wchar_t 201 * is 32-bit wide. 202 * 203 * \see toCWString() 204 */ 205 wstring toWString() const; 206 207 /*! 208 * Creates and returns a standard C-style (null-terminated) version of this 209 * String. The returned string is encoded in UTF8 if \a unicode is true, 210 * otherwise Latin1. 211 * 212 * The returned string is still owned by this String and should not be deleted 213 * by the user. 214 * 215 * The returned pointer remains valid until this String instance is destroyed 216 * or toCString() is called again. 217 * 218 * \warning This however has the side effect that the returned string will remain 219 * in memory <b>in addition to</b> other memory that is consumed by this 220 * String instance. So, this method should not be used on large strings or 221 * where memory is critical. Consider using to8Bit() instead to avoid it. 222 * 223 * \see to8Bit() 224 */ 225 const char *toCString(bool unicode = false) const; 226 227 /*! 228 * Returns a standard C-style (null-terminated) wide character version of 229 * this String. The returned string is encoded in UTF-16 (without BOM/CPU byte 230 * order), not UTF-32 even if wchar_t is 32-bit wide. 231 * 232 * The returned string is still owned by this String and should not be deleted 233 * by the user. 234 * 235 * The returned pointer remains valid until this String instance is destroyed 236 * or any other method of this String is called. 237 * 238 * \note This returns a pointer to the String's internal data without any 239 * conversions. 240 * 241 * \see toWString() 242 */ 243 const wchar_t *toCWString() const; 244 245 /*! 246 * Returns an iterator pointing to the beginning of the string. 247 */ 248 Iterator begin(); 249 250 /*! 251 * Returns a const iterator pointing to the beginning of the string. 252 */ 253 ConstIterator begin() const; 254 255 /*! 256 * Returns an iterator pointing to the end of the string (the position 257 * after the last character). 258 */ 259 Iterator end(); 260 261 /*! 262 * Returns a const iterator pointing to the end of the string (the position 263 * after the last character). 264 */ 265 ConstIterator end() const; 266 267 /*! 268 * Finds the first occurrence of pattern \a s in this string starting from 269 * \a offset. If the pattern is not found, -1 is returned. 270 */ 271 int find(const String &s, int offset = 0) const; 272 273 /*! 274 * Finds the last occurrence of pattern \a s in this string, searched backwards, 275 * either from the end of the string or starting from \a offset. If the pattern 276 * is not found, -1 is returned. 277 */ 278 int rfind(const String &s, int offset = -1) const; 279 280 /*! 281 * Splits the string on each occurrence of \a separator. 282 */ 283 StringList split(const String &separator = " ") const; 284 285 /*! 286 * Returns true if the strings starts with the substring \a s. 287 */ 288 bool startsWith(const String &s) const; 289 290 /*! 291 * Extract a substring from this string starting at \a position and 292 * continuing for \a n characters. 293 */ 294 String substr(unsigned int position, unsigned int n = 0xffffffff) const; 295 296 /*! 297 * Append \a s to the current string and return a reference to the current 298 * string. 299 */ 300 String &append(const String &s); 301 302 /*! 303 * Clears the string. 304 */ 305 String &clear(); 306 307 /*! 308 * Returns an upper case version of the string. 309 * 310 * \warning This only works for the characters in US-ASCII, i.e. A-Z. 311 */ 312 String upper() const; 313 314 /*! 315 * Returns the size of the string. 316 */ 317 unsigned int size() const; 318 319 /*! 320 * Returns the length of the string. Equivalent to size(). 321 */ 322 unsigned int length() const; 323 324 /*! 325 * Returns true if the string is empty. 326 * 327 * \see isNull() 328 */ 329 bool isEmpty() const; 330 331 /*! 332 * Returns true if this string is null -- i.e. it is a copy of the 333 * String::null string. 334 * 335 * \note A string can be empty and not null. So do not use this method to 336 * check if the string is empty. 337 * 338 * \see isEmpty() 339 * 340 * \deprecated 341 */ 342 // BIC: remove 343 TAGLIB_DEPRECATED bool isNull() const; 344 345 /*! 346 * Returns a ByteVector containing the string's data. If \a t is Latin1 or 347 * UTF8, this will return a vector of 8 bit characters, otherwise it will use 348 * 16 bit characters. 349 * 350 * \note If \a t is UTF16, the returned data is encoded in little-endian 351 * format and has a BOM. 352 * 353 * \note The returned data is not null terminated. 354 */ 355 ByteVector data(Type t) const; 356 357 /*! 358 * Convert the string to an integer. 359 * 360 * Returns the integer if the conversion was successful or 0 if the 361 * string does not represent a number. 362 */ 363 // BIC: merge with the method below 364 int toInt() const; 365 366 /*! 367 * Convert the string to an integer. 368 * 369 * If the conversion was successful, it sets the value of \a *ok to 370 * true and returns the integer. Otherwise it sets \a *ok to false 371 * and the result is undefined. 372 */ 373 int toInt(bool *ok) const; 374 375 /*! 376 * Returns a string with the leading and trailing whitespace stripped. 377 */ 378 String stripWhiteSpace() const; 379 380 /*! 381 * Returns true if the file only uses characters required by Latin1. 382 */ 383 bool isLatin1() const; 384 385 /*! 386 * Returns true if the file only uses characters required by (7-bit) ASCII. 387 */ 388 bool isAscii() const; 389 390 /*! 391 * Converts the base-10 integer \a n to a string. 392 */ 393 static String number(int n); 394 395 /*! 396 * Returns a reference to the character at position \a i. 397 */ 398 wchar_t &operator[](int i); 399 400 /*! 401 * Returns a const reference to the character at position \a i. 402 */ 403 const wchar_t &operator[](int i) const; 404 405 /*! 406 * Compares each character of the String with each character of \a s and 407 * returns true if the strings match. 408 */ 409 bool operator==(const String &s) const; 410 411 /*! 412 * Compares each character of the String with each character of \a s and 413 * returns false if the strings match. 414 */ 415 bool operator!=(const String &s) const; 416 417 /*! 418 * Compares each character of the String with each character of \a s and 419 * returns true if the strings match. 420 */ 421 bool operator==(const char *s) const; 422 423 /*! 424 * Compares each character of the String with each character of \a s and 425 * returns false if the strings match. 426 */ 427 bool operator!=(const char *s) const; 428 429 /*! 430 * Compares each character of the String with each character of \a s and 431 * returns true if the strings match. 432 */ 433 bool operator==(const wchar_t *s) const; 434 435 /*! 436 * Compares each character of the String with each character of \a s and 437 * returns false if the strings match. 438 */ 439 bool operator!=(const wchar_t *s) const; 440 441 /*! 442 * Appends \a s to the end of the String. 443 */ 444 String &operator+=(const String &s); 445 446 /*! 447 * Appends \a s to the end of the String. 448 */ 449 String &operator+=(const wchar_t* s); 450 451 /*! 452 * Appends \a s to the end of the String. 453 */ 454 String &operator+=(const char* s); 455 456 /*! 457 * Appends \a s to the end of the String. 458 */ 459 String &operator+=(wchar_t c); 460 461 /*! 462 * Appends \a c to the end of the String. 463 */ 464 String &operator+=(char c); 465 466 /*! 467 * Performs a shallow, implicitly shared, copy of \a s, overwriting the 468 * String's current data. 469 */ 470 String &operator=(const String &s); 471 472 /*! 473 * Performs a deep copy of the data in \a s. 474 */ 475 String &operator=(const std::string &s); 476 477 /*! 478 * Performs a deep copy of the data in \a s. 479 */ 480 String &operator=(const wstring &s); 481 482 /*! 483 * Performs a deep copy of the data in \a s. 484 */ 485 String &operator=(const wchar_t *s); 486 487 /*! 488 * Performs a deep copy of the data in \a s. 489 */ 490 String &operator=(char c); 491 492 /*! 493 * Performs a deep copy of the data in \a s. 494 */ 495 String &operator=(wchar_t c); 496 497 /*! 498 * Performs a deep copy of the data in \a s. 499 */ 500 String &operator=(const char *s); 501 502 /*! 503 * Performs a deep copy of the data in \a v. 504 */ 505 String &operator=(const ByteVector &v); 506 507 /*! 508 * Exchanges the content of the String by the content of \a s. 509 */ 510 void swap(String &s); 511 512 /*! 513 * To be able to use this class in a Map, this operator needed to be 514 * implemented. Returns true if \a s is less than this string in a byte-wise 515 * comparison. 516 */ 517 bool operator<(const String &s) const; 518 519 /*! 520 * A null string provided for convenience. 521 * 522 * \warning Do not modify this variable. It will mess up the internal state 523 * of TagLib. 524 * 525 * \deprecated 526 */ 527 // BIC: remove 528 TAGLIB_DEPRECATED static String null; 529 530 protected: 531 /*! 532 * If this String is being shared via implicit sharing, do a deep copy of the 533 * data and separate from the shared members. This should be called by all 534 * non-const subclass members. 535 */ 536 void detach(); 537 538 private: 539 /*! 540 * \deprecated This variable is no longer used, but NEVER remove this. It 541 * may lead to a linkage error. 542 */ 543 // BIC: remove 544 TAGLIB_DEPRECATED static const Type WCharByteOrder; 545 546 class StringPrivate; 547 StringPrivate *d; 548 }; 549 } 550 551 /*! 552 * \relates TagLib::String 553 * 554 * Concatenates \a s1 and \a s2 and returns the result as a string. 555 */ 556 TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2); 557 558 /*! 559 * \relates TagLib::String 560 * 561 * Concatenates \a s1 and \a s2 and returns the result as a string. 562 */ 563 TAGLIB_EXPORT const TagLib::String operator+(const char *s1, const TagLib::String &s2); 564 565 /*! 566 * \relates TagLib::String 567 * 568 * Concatenates \a s1 and \a s2 and returns the result as a string. 569 */ 570 TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const char *s2); 571 572 573 /*! 574 * \relates TagLib::String 575 * 576 * Send the string to an output stream. 577 */ 578 TAGLIB_EXPORT std::ostream &operator<<(std::ostream &s, const TagLib::String &str); 579 580 #endif 581