xref: /aosp_15_r20/external/icing/icing/tokenization/tokenizer-factory.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_TOKENIZATION_TOKENIZER_FACTORY_H_
16 #define ICING_TOKENIZATION_TOKENIZER_FACTORY_H_
17 
18 #include <memory>
19 
20 #include "icing/text_classifier/lib3/utils/base/statusor.h"
21 #include "icing/proto/schema.pb.h"
22 #include "icing/tokenization/language-segmenter.h"
23 #include "icing/tokenization/tokenizer.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 namespace tokenizer_factory {
29 
30 // Factory function to create an indexing Tokenizer which does not take
31 // ownership of any input components, and all pointers must refer to valid
32 // objects that outlive the created Tokenizer instance.
33 //
34 // Returns:
35 //   A tokenizer on success
36 //   FAILED_PRECONDITION on any null pointer input
37 //   INVALID_ARGUMENT if tokenizer type is invalid
38 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
39 CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
40                         const LanguageSegmenter* lang_segmenter);
41 
42 }  // namespace tokenizer_factory
43 
44 }  // namespace lib
45 }  // namespace icing
46 
47 #endif  // ICING_TOKENIZATION_TOKENIZER_FACTORY_H_
48