xref: /aosp_15_r20/external/cronet/base/i18n/encoding_detection.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2016 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #include "base/i18n/encoding_detection.h"
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include "build/build_config.h"
8*6777b538SAndroid Build Coastguard Worker #include "third_party/ced/src/compact_enc_det/compact_enc_det.h"
9*6777b538SAndroid Build Coastguard Worker 
10*6777b538SAndroid Build Coastguard Worker // third_party/ced/src/util/encodings/encodings.h, which is included
11*6777b538SAndroid Build Coastguard Worker // by the include above, undefs UNICODE because that is a macro used
12*6777b538SAndroid Build Coastguard Worker // internally in ced. If we later in the same translation unit do
13*6777b538SAndroid Build Coastguard Worker // anything related to Windows or Windows headers those will then use
14*6777b538SAndroid Build Coastguard Worker // the ASCII versions which we do not want. To avoid that happening in
15*6777b538SAndroid Build Coastguard Worker // jumbo builds, we redefine UNICODE again here.
16*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
17*6777b538SAndroid Build Coastguard Worker #define UNICODE 1
18*6777b538SAndroid Build Coastguard Worker #endif  // BUILDFLAG(IS_WIN)
19*6777b538SAndroid Build Coastguard Worker 
20*6777b538SAndroid Build Coastguard Worker namespace base {
21*6777b538SAndroid Build Coastguard Worker 
DetectEncoding(const std::string & text,std::string * encoding)22*6777b538SAndroid Build Coastguard Worker bool DetectEncoding(const std::string& text, std::string* encoding) {
23*6777b538SAndroid Build Coastguard Worker   int consumed_bytes;
24*6777b538SAndroid Build Coastguard Worker   bool is_reliable;
25*6777b538SAndroid Build Coastguard Worker   Encoding enc = CompactEncDet::DetectEncoding(
26*6777b538SAndroid Build Coastguard Worker       text.c_str(), text.length(), nullptr, nullptr, nullptr,
27*6777b538SAndroid Build Coastguard Worker       UNKNOWN_ENCODING,
28*6777b538SAndroid Build Coastguard Worker       UNKNOWN_LANGUAGE,
29*6777b538SAndroid Build Coastguard Worker       CompactEncDet::QUERY_CORPUS,  // plain text
30*6777b538SAndroid Build Coastguard Worker       false,  // Include 7-bit encodings
31*6777b538SAndroid Build Coastguard Worker       &consumed_bytes,
32*6777b538SAndroid Build Coastguard Worker       &is_reliable);
33*6777b538SAndroid Build Coastguard Worker 
34*6777b538SAndroid Build Coastguard Worker   if (enc == UNKNOWN_ENCODING)
35*6777b538SAndroid Build Coastguard Worker     return false;
36*6777b538SAndroid Build Coastguard Worker 
37*6777b538SAndroid Build Coastguard Worker   *encoding = MimeEncodingName(enc);
38*6777b538SAndroid Build Coastguard Worker   return true;
39*6777b538SAndroid Build Coastguard Worker }
40*6777b538SAndroid Build Coastguard Worker }  // namespace base
41