xref: /MusicPlayer2/MusicPlayer2/taglib/tstring.h (revision 2661106a96494c0a7dfab38bf1ae7b9565882443)
1 /***************************************************************************
2     copyright            : (C) 2002 - 2008 by Scott Wheeler
3     email                : [email protected]
4  ***************************************************************************/
5 
6 /***************************************************************************
7  *   This library is free software; you can redistribute it and/or modify  *
8  *   it under the terms of the GNU Lesser General Public License version   *
9  *   2.1 as published by the Free Software Foundation.                     *
10  *                                                                         *
11  *   This library is distributed in the hope that it will be useful, but   *
12  *   WITHOUT ANY WARRANTY; without even the implied warranty of            *
13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *
14  *   Lesser General Public License for more details.                       *
15  *                                                                         *
16  *   You should have received a copy of the GNU Lesser General Public      *
17  *   License along with this library; if not, write to the Free Software   *
18  *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA         *
19  *   02110-1301  USA                                                       *
20  *                                                                         *
21  *   Alternatively, this file is available under the Mozilla Public        *
22  *   License Version 1.1.  You may obtain a copy of the License at         *
23  *   http://www.mozilla.org/MPL/                                           *
24  ***************************************************************************/
25 
26 #ifndef TAGLIB_STRING_H
27 #define TAGLIB_STRING_H
28 
29 #include "taglib_export.h"
30 #include "taglib.h"
31 #include "tbytevector.h"
32 
33 #include <string>
34 #include <iostream>
35 
36 /*!
37  * \relates TagLib::String
38  *
39  * Converts a QString to a TagLib::String without a requirement to link to Qt.
40  *
41  * \note consider conversion via usual char-by-char for loop to avoid UTF16->UTF8->UTF16
42  * conversion happening in the background
43  */
44 
45 #if defined(QT_VERSION) && (QT_VERSION >= 0x040000)
46 #define QStringToTString(s) TagLib::String(s.toUtf8().data(), TagLib::String::UTF8)
47 #else
48 #define QStringToTString(s) TagLib::String(s.utf8().data(), TagLib::String::UTF8)
49 #endif
50 
51 /*!
52  * \relates TagLib::String
53  *
54  * Converts a TagLib::String to a QString without a requirement to link to Qt.
55  *
56  * \note consider conversion via usual char-by-char for loop to avoid UTF16->UTF8->UTF16
57  * conversion happening in the background
58  *
59  */
60 
61 #define TStringToQString(s) QString::fromUtf8(s.toCString(true))
62 
63 namespace TagLib {
64 
65   class StringList;
66 
67   //! A \e wide string class suitable for unicode.
68 
69   /*!
70    * This is an implicitly shared \e wide string.  For storage it uses
71    * TagLib::wstring, but as this is an <i>implementation detail</i> this of
72    * course could change.  Strings are stored internally as UTF-16(without BOM/
73    * CPU byte order)
74    *
75    * The use of implicit sharing means that copying a string is cheap, the only
76    * \e cost comes into play when the copy is modified.  Prior to that the string
77    * just has a pointer to the data of the \e parent String.  This also makes
78    * this class suitable as a function return type.
79    *
80    * In addition to adding implicit sharing, this class keeps track of four
81    * possible encodings, which are the four supported by the ID3v2 standard.
82    */
83 
84   class TAGLIB_EXPORT String
85   {
86   public:
87 
88 #ifndef DO_NOT_DOCUMENT
89     typedef TagLib::wstring::iterator Iterator;
90     typedef TagLib::wstring::const_iterator ConstIterator;
91 #endif
92 
93     /**
94      * The four types of string encodings supported by the ID3v2 specification.
95      * ID3v1 is assumed to be Latin1 and Ogg Vorbis comments use UTF8.
96      */
97     enum Type {
98       /*!
99        * IS08859-1, or <i>Latin1</i> encoding.  8 bit characters.
100        */
101       Latin1 = 0,
102       /*!
103        * UTF16 with a <i>byte order mark</i>.  16 bit characters.
104        */
105       UTF16 = 1,
106       /*!
107        * UTF16 <i>big endian</i>.  16 bit characters.  This is the encoding used
108        * internally by TagLib.
109        */
110       UTF16BE = 2,
111       /*!
112        * UTF8 encoding.  Characters are usually 8 bits but can be up to 32.
113        */
114       UTF8 = 3,
115       /*!
116        * UTF16 <i>little endian</i>.  16 bit characters.
117        */
118       UTF16LE = 4
119     };
120 
121     /*!
122      * Constructs an empty String.
123      */
124     String();
125 
126     /*!
127      * Make a shallow, implicitly shared, copy of \a s.  Because this is
128      * implicitly shared, this method is lightweight and suitable for
129      * pass-by-value usage.
130      */
131     String(const String &s);
132 
133     /*!
134      * Makes a deep copy of the data in \a s.
135      *
136      * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
137      * used with other codecs it will simply print a warning and exit.
138      */
139     String(const std::string &s, Type t = Latin1);
140 
141     /*!
142      * Makes a deep copy of the data in \a s.
143      *
144      * /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless
145      * of the CPU byte order.  If UTF16BE, it will not be swapped.  This behavior
146      * will be changed in TagLib2.0.
147      */
148     String(const wstring &s, Type t = UTF16BE);
149 
150     /*!
151      * Makes a deep copy of the data in \a s.
152      *
153      * /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless
154      * of the CPU byte order.  If UTF16BE, it will not be swapped.  This behavior
155      * will be changed in TagLib2.0.
156      */
157     String(const wchar_t *s, Type t = UTF16BE);
158 
159     /*!
160      * Makes a deep copy of the data in \a c.
161      *
162      * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
163      * used with other codecs it will simply print a warning and exit.
164      */
165     String(char c, Type t = Latin1);
166 
167     /*!
168      * Makes a deep copy of the data in \a c.
169      */
170     String(wchar_t c, Type t = Latin1);
171 
172     /*!
173      * Makes a deep copy of the data in \a s.
174      *
175      * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
176      * used with other codecs it will simply print a warning and exit.
177      */
178     String(const char *s, Type t = Latin1);
179 
180     /*!
181      * Makes a deep copy of the data in \a v.
182      */
183     String(const ByteVector &v, Type t = Latin1);
184 
185     /*!
186      * Destroys this String instance.
187      */
188     virtual ~String();
189 
190     /*!
191      * Returns a deep copy of this String as an std::string.  The returned string
192      * is encoded in UTF8 if \a unicode is true, otherwise Latin1.
193      *
194      * \see toCString()
195      */
196     std::string to8Bit(bool unicode = false) const;
197 
198     /*!
199      * Returns a deep copy of this String as a wstring.  The returned string is
200      * encoded in UTF-16 (without BOM/CPU byte order), not UTF-32 even if wchar_t
201      * is 32-bit wide.
202      *
203      * \see toCWString()
204      */
205     wstring toWString() const;
206 
207     /*!
208      * Creates and returns a standard C-style (null-terminated) version of this
209      * String.  The returned string is encoded in UTF8 if \a unicode is true,
210      * otherwise Latin1.
211      *
212      * The returned string is still owned by this String and should not be deleted
213      * by the user.
214      *
215      * The returned pointer remains valid until this String instance is destroyed
216      * or toCString() is called again.
217      *
218      * \warning This however has the side effect that the returned string will remain
219      * in memory <b>in addition to</b> other memory that is consumed by this
220      * String instance.  So, this method should not be used on large strings or
221      * where memory is critical.  Consider using to8Bit() instead to avoid it.
222      *
223      * \see to8Bit()
224      */
225     const char *toCString(bool unicode = false) const;
226 
227     /*!
228      * Returns a standard C-style (null-terminated) wide character version of
229      * this String.  The returned string is encoded in UTF-16 (without BOM/CPU byte
230      * order), not UTF-32 even if wchar_t is 32-bit wide.
231      *
232      * The returned string is still owned by this String and should not be deleted
233      * by the user.
234      *
235      * The returned pointer remains valid until this String instance is destroyed
236      * or any other method of this String is called.
237      *
238      * \note This returns a pointer to the String's internal data without any
239      * conversions.
240      *
241      * \see toWString()
242      */
243     const wchar_t *toCWString() const;
244 
245     /*!
246      * Returns an iterator pointing to the beginning of the string.
247      */
248     Iterator begin();
249 
250     /*!
251      * Returns a const iterator pointing to the beginning of the string.
252      */
253     ConstIterator begin() const;
254 
255     /*!
256      * Returns an iterator pointing to the end of the string (the position
257      * after the last character).
258      */
259     Iterator end();
260 
261     /*!
262      * Returns a const iterator pointing to the end of the string (the position
263      * after the last character).
264      */
265     ConstIterator end() const;
266 
267     /*!
268      * Finds the first occurrence of pattern \a s in this string starting from
269      * \a offset.  If the pattern is not found, -1 is returned.
270      */
271     int find(const String &s, int offset = 0) const;
272 
273     /*!
274      * Finds the last occurrence of pattern \a s in this string, searched backwards,
275      * either from the end of the string or starting from \a offset. If the pattern
276      * is not found, -1 is returned.
277      */
278     int rfind(const String &s, int offset = -1) const;
279 
280     /*!
281      * Splits the string on each occurrence of \a separator.
282      */
283     StringList split(const String &separator = " ") const;
284 
285     /*!
286      * Returns true if the strings starts with the substring \a s.
287      */
288     bool startsWith(const String &s) const;
289 
290     /*!
291      * Extract a substring from this string starting at \a position and
292      * continuing for \a n characters.
293      */
294     String substr(unsigned int position, unsigned int n = 0xffffffff) const;
295 
296     /*!
297      * Append \a s to the current string and return a reference to the current
298      * string.
299      */
300     String &append(const String &s);
301 
302     /*!
303      * Clears the string.
304      */
305     String &clear();
306 
307     /*!
308      * Returns an upper case version of the string.
309      *
310      * \warning This only works for the characters in US-ASCII, i.e. A-Z.
311      */
312     String upper() const;
313 
314     /*!
315      * Returns the size of the string.
316      */
317     unsigned int size() const;
318 
319     /*!
320      * Returns the length of the string.  Equivalent to size().
321      */
322     unsigned int length() const;
323 
324     /*!
325      * Returns true if the string is empty.
326      *
327      * \see isNull()
328      */
329     bool isEmpty() const;
330 
331     /*!
332      * Returns true if this string is null -- i.e. it is a copy of the
333      * String::null string.
334      *
335      * \note A string can be empty and not null.  So do not use this method to
336      * check if the string is empty.
337      *
338      * \see isEmpty()
339      *
340      * \deprecated
341      */
342      // BIC: remove
343     TAGLIB_DEPRECATED bool isNull() const;
344 
345     /*!
346      * Returns a ByteVector containing the string's data.  If \a t is Latin1 or
347      * UTF8, this will return a vector of 8 bit characters, otherwise it will use
348      * 16 bit characters.
349      *
350      * \note If \a t is UTF16, the returned data is encoded in little-endian
351      * format and has a BOM.
352      *
353      * \note The returned data is not null terminated.
354      */
355     ByteVector data(Type t) const;
356 
357     /*!
358      * Convert the string to an integer.
359      *
360      * Returns the integer if the conversion was successful or 0 if the
361      * string does not represent a number.
362      */
363     // BIC: merge with the method below
364     int toInt() const;
365 
366     /*!
367      * Convert the string to an integer.
368      *
369      * If the conversion was successful, it sets the value of \a *ok to
370      * true and returns the integer. Otherwise it sets \a *ok to false
371      * and the result is undefined.
372      */
373     int toInt(bool *ok) const;
374 
375     /*!
376      * Returns a string with the leading and trailing whitespace stripped.
377      */
378     String stripWhiteSpace() const;
379 
380     /*!
381      * Returns true if the file only uses characters required by Latin1.
382      */
383     bool isLatin1() const;
384 
385     /*!
386      * Returns true if the file only uses characters required by (7-bit) ASCII.
387      */
388     bool isAscii() const;
389 
390     /*!
391      * Converts the base-10 integer \a n to a string.
392      */
393     static String number(int n);
394 
395     /*!
396      * Returns a reference to the character at position \a i.
397      */
398     wchar_t &operator[](int i);
399 
400     /*!
401      * Returns a const reference to the character at position \a i.
402      */
403     const wchar_t &operator[](int i) const;
404 
405     /*!
406      * Compares each character of the String with each character of \a s and
407      * returns true if the strings match.
408      */
409     bool operator==(const String &s) const;
410 
411     /*!
412      * Compares each character of the String with each character of \a s and
413      * returns false if the strings match.
414      */
415     bool operator!=(const String &s) const;
416 
417     /*!
418      * Compares each character of the String with each character of \a s and
419      * returns true if the strings match.
420      */
421     bool operator==(const char *s) const;
422 
423     /*!
424      * Compares each character of the String with each character of \a s and
425      * returns false if the strings match.
426      */
427     bool operator!=(const char *s) const;
428 
429     /*!
430      * Compares each character of the String with each character of \a s and
431      * returns true if the strings match.
432      */
433     bool operator==(const wchar_t *s) const;
434 
435     /*!
436      * Compares each character of the String with each character of \a s and
437      * returns false if the strings match.
438      */
439     bool operator!=(const wchar_t *s) const;
440 
441     /*!
442      * Appends \a s to the end of the String.
443      */
444     String &operator+=(const String &s);
445 
446     /*!
447      * Appends \a s to the end of the String.
448      */
449     String &operator+=(const wchar_t* s);
450 
451     /*!
452      * Appends \a s to the end of the String.
453      */
454     String &operator+=(const char* s);
455 
456     /*!
457      * Appends \a s to the end of the String.
458      */
459     String &operator+=(wchar_t c);
460 
461     /*!
462      * Appends \a c to the end of the String.
463      */
464     String &operator+=(char c);
465 
466     /*!
467      * Performs a shallow, implicitly shared, copy of \a s, overwriting the
468      * String's current data.
469      */
470     String &operator=(const String &s);
471 
472     /*!
473      * Performs a deep copy of the data in \a s.
474      */
475     String &operator=(const std::string &s);
476 
477     /*!
478      * Performs a deep copy of the data in \a s.
479      */
480     String &operator=(const wstring &s);
481 
482     /*!
483      * Performs a deep copy of the data in \a s.
484      */
485     String &operator=(const wchar_t *s);
486 
487     /*!
488      * Performs a deep copy of the data in \a s.
489      */
490     String &operator=(char c);
491 
492     /*!
493      * Performs a deep copy of the data in \a s.
494      */
495     String &operator=(wchar_t c);
496 
497     /*!
498      * Performs a deep copy of the data in \a s.
499      */
500     String &operator=(const char *s);
501 
502     /*!
503      * Performs a deep copy of the data in \a v.
504      */
505     String &operator=(const ByteVector &v);
506 
507     /*!
508      * Exchanges the content of the String by the content of \a s.
509      */
510     void swap(String &s);
511 
512     /*!
513      * To be able to use this class in a Map, this operator needed to be
514      * implemented.  Returns true if \a s is less than this string in a byte-wise
515      * comparison.
516      */
517     bool operator<(const String &s) const;
518 
519     /*!
520      * A null string provided for convenience.
521      *
522      * \warning Do not modify this variable.  It will mess up the internal state
523      * of TagLib.
524      *
525      * \deprecated
526      */
527      // BIC: remove
528     TAGLIB_DEPRECATED static String null;
529 
530   protected:
531     /*!
532      * If this String is being shared via implicit sharing, do a deep copy of the
533      * data and separate from the shared members.  This should be called by all
534      * non-const subclass members.
535      */
536     void detach();
537 
538   private:
539     /*!
540      * \deprecated This variable is no longer used, but NEVER remove this. It
541      * may lead to a linkage error.
542      */
543      // BIC: remove
544     TAGLIB_DEPRECATED static const Type WCharByteOrder;
545 
546     class StringPrivate;
547     StringPrivate *d;
548   };
549 }
550 
551 /*!
552  * \relates TagLib::String
553  *
554  * Concatenates \a s1 and \a s2 and returns the result as a string.
555  */
556 TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2);
557 
558 /*!
559  * \relates TagLib::String
560  *
561  * Concatenates \a s1 and \a s2 and returns the result as a string.
562  */
563 TAGLIB_EXPORT const TagLib::String operator+(const char *s1, const TagLib::String &s2);
564 
565 /*!
566  * \relates TagLib::String
567  *
568  * Concatenates \a s1 and \a s2 and returns the result as a string.
569  */
570 TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const char *s2);
571 
572 
573 /*!
574  * \relates TagLib::String
575  *
576  * Send the string to an output stream.
577  */
578 TAGLIB_EXPORT std::ostream &operator<<(std::ostream &s, const TagLib::String &str);
579 
580 #endif
581