xref: /aosp_15_r20/external/perfetto/src/trace_processor/importers/proto/string_encoding_utils.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 
2 /*
3  * Copyright (C) 2024 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #ifndef SRC_TRACE_PROCESSOR_IMPORTERS_PROTO_STRING_ENCODING_UTILS_H_
19 #define SRC_TRACE_PROCESSOR_IMPORTERS_PROTO_STRING_ENCODING_UTILS_H_
20 
21 #include <string>
22 
23 #include "perfetto/protozero/field.h"
24 
25 namespace perfetto {
26 namespace trace_processor {
27 
28 // Converts a byte stream that represents a latin-1
29 // (https://en.wikipedia.org/wiki/ISO/IEC_8859-1) encoded string to a UTF-8
30 // (https://en.wikipedia.org/wiki/UTF-8) encoded std::string.
31 // This operation will never fail.
32 std::string ConvertLatin1ToUtf8(protozero::ConstBytes latin1);
33 
34 // Converts a byte stream that represents a UTF16 Little Endian
35 // (https://en.wikipedia.org/wiki/ISO/IEC_8859-1) encoded string to a UTF-8
36 // (https://en.wikipedia.org/wiki/UTF-8) encoded std::string.
37 //
38 // NOTE: UTF16 CodeUnits that can not be correctly parsed will be converted to
39 // the invalid CodePoint U+FFFD.
40 //
41 // ATTENTION: This function performs no special handling of special characters
42 // such as BOM (byte order mark). In particular this means that the caller is
43 // responsible of determining the right endianess and remove those characters if
44 // needed.
45 std::string ConvertUtf16LeToUtf8(protozero::ConstBytes utf16);
46 
47 // Converts a byte stream that represents a UTF16 Big Endian
48 // (https://en.wikipedia.org/wiki/ISO/IEC_8859-1) encoded string to a UTF-8
49 // (https://en.wikipedia.org/wiki/UTF-8) encoded std::string.
50 //
51 // NOTE: UTF16 CodeUnits that can not be correctly parsed will be converted to
52 // the invalid CodePoint U+FFFD.
53 //
54 // ATTENTION: This function performs no special handling of special characters
55 // such as BOM (byte order mark). In particular this means that the caller is
56 // responsible of determining the right endianess and remove any special
57 // character if  needed.
58 std::string ConvertUtf16BeToUtf8(protozero::ConstBytes utf16);
59 
60 }  // namespace trace_processor
61 }  // namespace perfetto
62 
63 #endif  // SRC_TRACE_PROCESSOR_IMPORTERS_PROTO_STRING_ENCODING_UTILS_H_
64