xref: /aosp_15_r20/external/libchrome/base/i18n/encoding_detection.cc (revision 635a864187cb8b6c713ff48b7e790a6b21769273)
1*635a8641SAndroid Build Coastguard Worker // Copyright 2016 The Chromium Authors. All rights reserved.
2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file.
4*635a8641SAndroid Build Coastguard Worker 
5*635a8641SAndroid Build Coastguard Worker #include "base/i18n/encoding_detection.h"
6*635a8641SAndroid Build Coastguard Worker 
7*635a8641SAndroid Build Coastguard Worker #include "build/build_config.h"
8*635a8641SAndroid Build Coastguard Worker #include "third_party/ced/src/compact_enc_det/compact_enc_det.h"
9*635a8641SAndroid Build Coastguard Worker 
10*635a8641SAndroid Build Coastguard Worker // third_party/ced/src/util/encodings/encodings.h, which is included
11*635a8641SAndroid Build Coastguard Worker // by the include above, undefs UNICODE because that is a macro used
12*635a8641SAndroid Build Coastguard Worker // internally in ced. If we later in the same translation unit do
13*635a8641SAndroid Build Coastguard Worker // anything related to Windows or Windows headers those will then use
14*635a8641SAndroid Build Coastguard Worker // the ASCII versions which we do not want. To avoid that happening in
15*635a8641SAndroid Build Coastguard Worker // jumbo builds, we redefine UNICODE again here.
16*635a8641SAndroid Build Coastguard Worker #if defined(OS_WIN)
17*635a8641SAndroid Build Coastguard Worker #define UNICODE 1
18*635a8641SAndroid Build Coastguard Worker #endif  // OS_WIN
19*635a8641SAndroid Build Coastguard Worker 
20*635a8641SAndroid Build Coastguard Worker namespace base {
21*635a8641SAndroid Build Coastguard Worker 
DetectEncoding(const std::string & text,std::string * encoding)22*635a8641SAndroid Build Coastguard Worker bool DetectEncoding(const std::string& text, std::string* encoding) {
23*635a8641SAndroid Build Coastguard Worker   int consumed_bytes;
24*635a8641SAndroid Build Coastguard Worker   bool is_reliable;
25*635a8641SAndroid Build Coastguard Worker   Encoding enc = CompactEncDet::DetectEncoding(
26*635a8641SAndroid Build Coastguard Worker       text.c_str(), text.length(), nullptr, nullptr, nullptr,
27*635a8641SAndroid Build Coastguard Worker       UNKNOWN_ENCODING,
28*635a8641SAndroid Build Coastguard Worker       UNKNOWN_LANGUAGE,
29*635a8641SAndroid Build Coastguard Worker       CompactEncDet::QUERY_CORPUS,  // plain text
30*635a8641SAndroid Build Coastguard Worker       false,  // Include 7-bit encodings
31*635a8641SAndroid Build Coastguard Worker       &consumed_bytes,
32*635a8641SAndroid Build Coastguard Worker       &is_reliable);
33*635a8641SAndroid Build Coastguard Worker 
34*635a8641SAndroid Build Coastguard Worker   if (enc == UNKNOWN_ENCODING)
35*635a8641SAndroid Build Coastguard Worker     return false;
36*635a8641SAndroid Build Coastguard Worker 
37*635a8641SAndroid Build Coastguard Worker   *encoding = MimeEncodingName(enc);
38*635a8641SAndroid Build Coastguard Worker   return true;
39*635a8641SAndroid Build Coastguard Worker }
40*635a8641SAndroid Build Coastguard Worker }  // namespace base
41