1 //===-- ConstString.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_UTILITY_CONSTSTRING_H
10 #define LLDB_UTILITY_CONSTSTRING_H
11 
12 #include "llvm/ADT/DenseMapInfo.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/FormatVariadic.h"
15 
16 #include <cstddef>
17 #include <string_view>
18 
19 namespace lldb_private {
20 class Stream;
21 }
22 namespace llvm {
23 class raw_ostream;
24 }
25 
26 namespace lldb_private {
27 
28 /// \class ConstString ConstString.h "lldb/Utility/ConstString.h"
29 /// A uniqued constant string class.
30 ///
31 /// Provides an efficient way to store strings as uniqued strings. After the
32 /// strings are uniqued, finding strings that are equal to one another is very
33 /// fast as just the pointers need to be compared. It also allows for many
34 /// common strings from many different sources to be shared to keep the memory
35 /// footprint low.
36 ///
37 /// No reference counting is done on strings that are added to the string
38 /// pool, once strings are added they are in the string pool for the life of
39 /// the program.
40 class ConstString {
41 public:
42   /// Default constructor
43   ///
44   /// Initializes the string to an empty string.
45   ConstString() = default;
46 
47   explicit ConstString(llvm::StringRef s);
48 
49   /// Construct with C String value
50   ///
51   /// Constructs this object with a C string by looking to see if the
52   /// C string already exists in the global string pool. If it doesn't
53   /// exist, it is added to the string pool.
54   ///
55   /// \param[in] cstr
56   ///     A NULL terminated C string to add to the string pool.
57   explicit ConstString(const char *cstr);
58 
59   /// Construct with C String value with max length
60   ///
61   /// Constructs this object with a C string with a length. If \a max_cstr_len
62   /// is greater than the actual length of the string, the string length will
63   /// be truncated. This allows substrings to be created without the need to
64   /// NULL terminate the string as it is passed into this function.
65   ///
66   /// \param[in] cstr
67   ///     A pointer to the first character in the C string. The C
68   ///     string can be NULL terminated in a buffer that contains
69   ///     more characters than the length of the string, or the
70   ///     string can be part of another string and a new substring
71   ///     can be created.
72   ///
73   /// \param[in] max_cstr_len
74   ///     The max length of \a cstr. If the string length of \a cstr
75   ///     is less than \a max_cstr_len, then the string will be
76   ///     truncated. If the string length of \a cstr is greater than
77   ///     \a max_cstr_len, then only max_cstr_len bytes will be used
78   ///     from \a cstr.
79   explicit ConstString(const char *cstr, size_t max_cstr_len);
80 
81   /// Convert to bool operator.
82   ///
83   /// This allows code to check a ConstString object to see if it contains a
84   /// valid string using code such as:
85   ///
86   /// \code
87   /// ConstString str(...);
88   /// if (str)
89   /// { ...
90   /// \endcode
91   ///
92   /// \return
93   ///     /b True this object contains a valid non-empty C string, \b
94   ///     false otherwise.
95   explicit operator bool() const { return !IsEmpty(); }
96 
97   /// Equal to operator
98   ///
99   /// Returns true if this string is equal to the string in \a rhs. This
100   /// operation is very fast as it results in a pointer comparison since all
101   /// strings are in a uniqued in a global string pool.
102   ///
103   /// \param[in] rhs
104   ///     Another string object to compare this object to.
105   ///
106   /// \return
107   ///     true if this object is equal to \a rhs.
108   ///     false if this object is not equal to \a rhs.
109   bool operator==(ConstString rhs) const {
110     // We can do a pointer compare to compare these strings since they must
111     // come from the same pool in order to be equal.
112     return m_string == rhs.m_string;
113   }
114 
115   /// Equal to operator against a non-ConstString value.
116   ///
117   /// Returns true if this string is equal to the string in \a rhs. This
118   /// overload is usually slower than comparing against a ConstString value.
119   /// However, if the rhs string not already a ConstString and it is impractical
120   /// to turn it into a non-temporary variable, then this overload is faster.
121   ///
122   /// \param[in] rhs
123   ///     Another string object to compare this object to.
124   ///
125   /// \return
126   ///     \b true if this object is equal to \a rhs.
127   ///     \b false if this object is not equal to \a rhs.
128   bool operator==(const char *rhs) const {
129     // ConstString differentiates between empty strings and nullptr strings, but
130     // StringRef doesn't. Therefore we have to do this check manually now.
131     if (m_string == nullptr && rhs != nullptr)
132       return false;
133     if (m_string != nullptr && rhs == nullptr)
134       return false;
135 
136     return GetStringRef() == rhs;
137   }
138 
139   /// Not equal to operator
140   ///
141   /// Returns true if this string is not equal to the string in \a rhs. This
142   /// operation is very fast as it results in a pointer comparison since all
143   /// strings are in a uniqued in a global string pool.
144   ///
145   /// \param[in] rhs
146   ///     Another string object to compare this object to.
147   ///
148   /// \return
149   ///     \b true if this object is not equal to \a rhs.
150   ///     \b false if this object is equal to \a rhs.
151   bool operator!=(ConstString rhs) const { return m_string != rhs.m_string; }
152 
153   /// Not equal to operator against a non-ConstString value.
154   ///
155   /// Returns true if this string is not equal to the string in \a rhs. This
156   /// overload is usually slower than comparing against a ConstString value.
157   /// However, if the rhs string not already a ConstString and it is impractical
158   /// to turn it into a non-temporary variable, then this overload is faster.
159   ///
160   /// \param[in] rhs
161   ///     Another string object to compare this object to.
162   ///
163   /// \return \b true if this object is not equal to \a rhs, false otherwise.
164   bool operator!=(const char *rhs) const { return !(*this == rhs); }
165 
166   bool operator<(ConstString rhs) const;
167 
168   // Implicitly convert \class ConstString instances to \class StringRef.
StringRef()169   operator llvm::StringRef() const { return GetStringRef(); }
170 
171   // Explicitly convert \class ConstString instances to \class std::string_view.
string_view()172   explicit operator std::string_view() const {
173     return std::string_view(m_string, GetLength());
174   }
175 
176   // Explicitly convert \class ConstString instances to \class std::string.
string()177   explicit operator std::string() const { return GetString(); }
178 
179   /// Get the string value as a C string.
180   ///
181   /// Get the value of the contained string as a NULL terminated C string
182   /// value.
183   ///
184   /// If \a value_if_empty is nullptr, then nullptr will be returned.
185   ///
186   /// \return Returns \a value_if_empty if the string is empty, otherwise
187   ///     the C string value contained in this object.
188   const char *AsCString(const char *value_if_empty = nullptr) const {
189     return (IsEmpty() ? value_if_empty : m_string);
190   }
191 
192   /// Get the string value as a llvm::StringRef
193   ///
194   /// \return
195   ///     Returns a new llvm::StringRef object filled in with the
196   ///     needed data.
GetStringRef()197   llvm::StringRef GetStringRef() const {
198     return llvm::StringRef(m_string, GetLength());
199   }
200 
201   /// Get the string value as a std::string
GetString()202   std::string GetString() const { return std::string(m_string, GetLength()); }
203 
204   /// Get the string value as a C string.
205   ///
206   /// Get the value of the contained string as a NULL terminated C string
207   /// value. Similar to the ConstString::AsCString() function, yet this
208   /// function will always return nullptr if the string is not valid. So this
209   /// function is a direct accessor to the string pointer value.
210   ///
211   /// \return
212   ///     Returns nullptr the string is invalid, otherwise the C string
213   ///     value contained in this object.
GetCString()214   const char *GetCString() const { return m_string; }
215 
216   /// Get the length in bytes of string value.
217   ///
218   /// The string pool stores the length of the string, so we can avoid calling
219   /// strlen() on the pointer value with this function.
220   ///
221   /// \return
222   ///     Returns the number of bytes that this string occupies in
223   ///     memory, not including the NULL termination byte.
224   size_t GetLength() const;
225 
226   /// Clear this object's state.
227   ///
228   /// Clear any contained string and reset the value to the empty string
229   /// value.
Clear()230   void Clear() { m_string = nullptr; }
231 
232   /// Equal to operator
233   ///
234   /// Returns true if this string is equal to the string in \a rhs. If case
235   /// sensitive equality is tested, this operation is very fast as it results
236   /// in a pointer comparison since all strings are in a uniqued in a global
237   /// string pool.
238   ///
239   /// \param[in] lhs
240   ///     The Left Hand Side const ConstString object reference.
241   ///
242   /// \param[in] rhs
243   ///     The Right Hand Side const ConstString object reference.
244   ///
245   /// \param[in] case_sensitive
246   ///     Case sensitivity. If true, case sensitive equality
247   ///     will be tested, otherwise character case will be ignored
248   ///
249   /// \return \b true if this object is equal to \a rhs, \b false otherwise.
250   static bool Equals(ConstString lhs, ConstString rhs,
251                      const bool case_sensitive = true);
252 
253   /// Compare two string objects.
254   ///
255   /// Compares the C string values contained in \a lhs and \a rhs and returns
256   /// an integer result.
257   ///
258   /// NOTE: only call this function when you want a true string
259   /// comparison. If you want string equality use the, use the == operator as
260   /// it is much more efficient. Also if you want string inequality, use the
261   /// != operator for the same reasons.
262   ///
263   /// \param[in] lhs
264   ///     The Left Hand Side const ConstString object reference.
265   ///
266   /// \param[in] rhs
267   ///     The Right Hand Side const ConstString object reference.
268   ///
269   /// \param[in] case_sensitive
270   ///     Case sensitivity of compare. If true, case sensitive compare
271   ///     will be performed, otherwise character case will be ignored
272   ///
273   /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs
274   static int Compare(ConstString lhs, ConstString rhs,
275                      const bool case_sensitive = true);
276 
277   /// Dump the object description to a stream.
278   ///
279   /// Dump the string value to the stream \a s. If the contained string is
280   /// empty, print \a value_if_empty to the stream instead. If \a
281   /// value_if_empty is nullptr, then nothing will be dumped to the stream.
282   ///
283   /// \param[in] s
284   ///     The stream that will be used to dump the object description.
285   ///
286   /// \param[in] value_if_empty
287   ///     The value to dump if the string is empty. If nullptr, nothing
288   ///     will be output to the stream.
289   void Dump(Stream *s, const char *value_if_empty = nullptr) const;
290 
291   /// Dump the object debug description to a stream.
292   ///
293   /// \param[in] s
294   ///     The stream that will be used to dump the object description.
295   void DumpDebug(Stream *s) const;
296 
297   /// Test for empty string.
298   ///
299   /// \return
300   ///     \b true if the contained string is empty.
301   ///     \b false if the contained string is not empty.
IsEmpty()302   bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
303 
304   /// Test for null string.
305   ///
306   /// \return
307   ///     \b true if there is no string associated with this instance.
308   ///     \b false if there is a string associated with this instance.
IsNull()309   bool IsNull() const { return m_string == nullptr; }
310 
311   /// Set the C string value.
312   ///
313   /// Set the string value in the object by uniquing the \a cstr string value
314   /// in our global string pool.
315   ///
316   /// If the C string already exists in the global string pool, it finds the
317   /// current entry and returns the existing value. If it doesn't exist, it is
318   /// added to the string pool.
319   ///
320   /// \param[in] cstr
321   ///     A NULL terminated C string to add to the string pool.
322   void SetCString(const char *cstr);
323 
324   void SetString(llvm::StringRef s);
325 
326   /// Set the C string value and its mangled counterpart.
327   ///
328   /// Object files and debug symbols often use mangled string to represent the
329   /// linkage name for a symbol, function or global. The string pool can
330   /// efficiently store these values and their counterparts so when we run
331   /// into another instance of a mangled name, we can avoid calling the name
332   /// demangler over and over on the same strings and then trying to unique
333   /// them.
334   ///
335   /// \param[in] demangled
336   ///     The demangled string to correlate with the \a mangled name.
337   ///
338   /// \param[in] mangled
339   ///     The already uniqued mangled ConstString to correlate the
340   ///     soon to be uniqued version of \a demangled.
341   void SetStringWithMangledCounterpart(llvm::StringRef demangled,
342                                        ConstString mangled);
343 
344   /// Retrieve the mangled or demangled counterpart for a mangled or demangled
345   /// ConstString.
346   ///
347   /// Object files and debug symbols often use mangled string to represent the
348   /// linkage name for a symbol, function or global. The string pool can
349   /// efficiently store these values and their counterparts so when we run
350   /// into another instance of a mangled name, we can avoid calling the name
351   /// demangler over and over on the same strings and then trying to unique
352   /// them.
353   ///
354   /// \param[in] counterpart
355   ///     A reference to a ConstString object that might get filled in
356   ///     with the demangled/mangled counterpart.
357   ///
358   /// \return
359   ///     /b True if \a counterpart was filled in with the counterpart
360   ///     /b false otherwise.
361   bool GetMangledCounterpart(ConstString &counterpart) const;
362 
363   /// Set the C string value with length.
364   ///
365   /// Set the string value in the object by uniquing \a cstr_len bytes
366   /// starting at the \a cstr string value in our global string pool. If trim
367   /// is true, then \a cstr_len indicates a maximum length of the CString and
368   /// if the actual length of the string is less, then it will be trimmed.
369   ///
370   /// If the C string already exists in the global string pool, it finds the
371   /// current entry and returns the existing value. If it doesn't exist, it is
372   /// added to the string pool.
373   ///
374   /// \param[in] cstr
375   ///     A NULL terminated C string to add to the string pool.
376   ///
377   /// \param[in] cstr_len
378   ///     The maximum length of the C string.
379   void SetCStringWithLength(const char *cstr, size_t cstr_len);
380 
381   /// Set the C string value with the minimum length between \a fixed_cstr_len
382   /// and the actual length of the C string. This can be used for data
383   /// structures that have a fixed length to store a C string where the string
384   /// might not be NULL terminated if the string takes the entire buffer.
385   void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
386 
387   /// Get the memory cost of this object.
388   ///
389   /// Return the size in bytes that this object takes in memory. This returns
390   /// the size in bytes of this object, which does not include any the shared
391   /// string values it may refer to.
392   ///
393   /// \return
394   ///     The number of bytes that this object occupies in memory.
MemorySize()395   size_t MemorySize() const { return sizeof(ConstString); }
396 
397   struct MemoryStats {
GetBytesTotalMemoryStats398     size_t GetBytesTotal() const { return bytes_total; }
GetBytesUsedMemoryStats399     size_t GetBytesUsed() const { return bytes_used; }
GetBytesUnusedMemoryStats400     size_t GetBytesUnused() const { return bytes_total - bytes_used; }
401     size_t bytes_total = 0;
402     size_t bytes_used = 0;
403   };
404 
405   static MemoryStats GetMemoryStats();
406 
407 protected:
408   template <typename T, typename Enable> friend struct ::llvm::DenseMapInfo;
409   /// Only used by DenseMapInfo.
FromStringPoolPointer(const char * ptr)410   static ConstString FromStringPoolPointer(const char *ptr) {
411     ConstString s;
412     s.m_string = ptr;
413     return s;
414   };
415 
416   const char *m_string = nullptr;
417 };
418 
419 /// Stream the string value \a str to the stream \a s
420 Stream &operator<<(Stream &s, ConstString str);
421 
422 } // namespace lldb_private
423 
424 namespace llvm {
425 template <> struct format_provider<lldb_private::ConstString> {
426   static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
427                      llvm::StringRef Options);
428 };
429 
430 /// DenseMapInfo implementation.
431 /// \{
432 template <> struct DenseMapInfo<lldb_private::ConstString> {
433   static inline lldb_private::ConstString getEmptyKey() {
434     return lldb_private::ConstString::FromStringPoolPointer(
435         DenseMapInfo<const char *>::getEmptyKey());
436   }
437   static inline lldb_private::ConstString getTombstoneKey() {
438     return lldb_private::ConstString::FromStringPoolPointer(
439         DenseMapInfo<const char *>::getTombstoneKey());
440   }
441   static unsigned getHashValue(lldb_private::ConstString val) {
442     return DenseMapInfo<const char *>::getHashValue(val.m_string);
443   }
444   static bool isEqual(lldb_private::ConstString LHS,
445                       lldb_private::ConstString RHS) {
446     return LHS == RHS;
447   }
448 };
449 /// \}
450 
451 inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) {
452   os << s.GetStringRef();
453   return os;
454 }
455 } // namespace llvm
456 
457 #endif // LLDB_UTILITY_CONSTSTRING_H
458