xref: /aosp_15_r20/external/icing/icing/tokenization/language-segmenter-factory.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_
16 #define ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_
17 
18 #include <memory>
19 #include <string_view>
20 #include <utility>
21 
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/jni/jni-cache.h"
24 #include "icing/tokenization/language-segmenter.h"
25 
26 namespace icing {
27 namespace lib {
28 
29 namespace language_segmenter_factory {
30 
31 struct SegmenterOptions {
32   explicit SegmenterOptions(std::string locale,
33                             const JniCache* jni_cache = nullptr,
34                             bool enable_icu_segmenter = false)
localeSegmenterOptions35       : locale(std::move(locale)),
36         jni_cache(jni_cache),
37         enable_icu_segmenter(enable_icu_segmenter) {}
38 
39   std::string locale;
40 
41   // Does not hold ownership.
42   const JniCache* jni_cache;
43 
44   // Determines whether to use an ICU based language segmenter
45   // in icu-with-reverse-jni-language-segmenter-factory or not.
46   // The default value is false, which means that the fallback option of a
47   // Reverse JNI based language segmenter will be used.
48   //
49   // This variable is a no-op for all other segmenter factories because they
50   // only support one segmenter type.
51   bool enable_icu_segmenter;
52 };
53 
54 // Creates a language segmenter with the given locale.
55 //
56 // Returns:
57 //   A LanguageSegmenter on success
58 //   INVALID_ARGUMENT_ERROR if locale string is invalid
59 libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
60     SegmenterOptions options);
61 
62 }  // namespace language_segmenter_factory
63 
64 }  // namespace lib
65 }  // namespace icing
66 
67 #endif  // ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_
68