xref: /aosp_15_r20/external/libtextclassifier/native/utils/utf8/unilib-javaicu.cc (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker  * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker  *
4*993b0882SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker  *
8*993b0882SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker  *
10*993b0882SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker  * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker  */
16*993b0882SAndroid Build Coastguard Worker 
17*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unilib-javaicu.h"
18*993b0882SAndroid Build Coastguard Worker 
19*993b0882SAndroid Build Coastguard Worker #include <math.h>
20*993b0882SAndroid Build Coastguard Worker 
21*993b0882SAndroid Build Coastguard Worker #include <cassert>
22*993b0882SAndroid Build Coastguard Worker #include <cctype>
23*993b0882SAndroid Build Coastguard Worker #include <map>
24*993b0882SAndroid Build Coastguard Worker 
25*993b0882SAndroid Build Coastguard Worker #include "utils/base/logging.h"
26*993b0882SAndroid Build Coastguard Worker #include "utils/base/statusor.h"
27*993b0882SAndroid Build Coastguard Worker #include "utils/java/jni-base.h"
28*993b0882SAndroid Build Coastguard Worker #include "utils/java/jni-helper.h"
29*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unicodetext.h"
30*993b0882SAndroid Build Coastguard Worker 
31*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
32*993b0882SAndroid Build Coastguard Worker 
UniLibBase()33*993b0882SAndroid Build Coastguard Worker UniLibBase::UniLibBase() {
34*993b0882SAndroid Build Coastguard Worker   TC3_LOG(FATAL) << "Java ICU UniLib must be initialized with a JniCache.";
35*993b0882SAndroid Build Coastguard Worker }
36*993b0882SAndroid Build Coastguard Worker 
UniLibBase(const std::shared_ptr<JniCache> & jni_cache)37*993b0882SAndroid Build Coastguard Worker UniLibBase::UniLibBase(const std::shared_ptr<JniCache>& jni_cache)
38*993b0882SAndroid Build Coastguard Worker     : jni_cache_(jni_cache) {}
39*993b0882SAndroid Build Coastguard Worker 
IsOpeningBracket(char32 codepoint) const40*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsOpeningBracket(char32 codepoint) const {
41*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::IsOpeningBracket(codepoint);
42*993b0882SAndroid Build Coastguard Worker }
43*993b0882SAndroid Build Coastguard Worker 
IsClosingBracket(char32 codepoint) const44*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsClosingBracket(char32 codepoint) const {
45*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::IsClosingBracket(codepoint);
46*993b0882SAndroid Build Coastguard Worker }
47*993b0882SAndroid Build Coastguard Worker 
IsWhitespace(char32 codepoint) const48*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsWhitespace(char32 codepoint) const {
49*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::IsWhitespace(codepoint);
50*993b0882SAndroid Build Coastguard Worker }
51*993b0882SAndroid Build Coastguard Worker 
IsDigit(char32 codepoint) const52*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsDigit(char32 codepoint) const {
53*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::IsDigit(codepoint);
54*993b0882SAndroid Build Coastguard Worker }
55*993b0882SAndroid Build Coastguard Worker 
IsLower(char32 codepoint) const56*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsLower(char32 codepoint) const {
57*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::IsLower(codepoint);
58*993b0882SAndroid Build Coastguard Worker }
59*993b0882SAndroid Build Coastguard Worker 
IsUpper(char32 codepoint) const60*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsUpper(char32 codepoint) const {
61*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::IsUpper(codepoint);
62*993b0882SAndroid Build Coastguard Worker }
63*993b0882SAndroid Build Coastguard Worker 
IsPunctuation(char32 codepoint) const64*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsPunctuation(char32 codepoint) const {
65*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::IsPunctuation(codepoint);
66*993b0882SAndroid Build Coastguard Worker }
67*993b0882SAndroid Build Coastguard Worker 
ToLower(char32 codepoint) const68*993b0882SAndroid Build Coastguard Worker char32 UniLibBase::ToLower(char32 codepoint) const {
69*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::ToLower(codepoint);
70*993b0882SAndroid Build Coastguard Worker }
71*993b0882SAndroid Build Coastguard Worker 
ToUpper(char32 codepoint) const72*993b0882SAndroid Build Coastguard Worker char32 UniLibBase::ToUpper(char32 codepoint) const {
73*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::ToUpper(codepoint);
74*993b0882SAndroid Build Coastguard Worker }
75*993b0882SAndroid Build Coastguard Worker 
GetPairedBracket(char32 codepoint) const76*993b0882SAndroid Build Coastguard Worker char32 UniLibBase::GetPairedBracket(char32 codepoint) const {
77*993b0882SAndroid Build Coastguard Worker   return libtextclassifier3::GetPairedBracket(codepoint);
78*993b0882SAndroid Build Coastguard Worker }
79*993b0882SAndroid Build Coastguard Worker 
80*993b0882SAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
81*993b0882SAndroid Build Coastguard Worker // Implementations that call out to JVM. Behold the beauty.
82*993b0882SAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
83*993b0882SAndroid Build Coastguard Worker 
Length(const UnicodeText & text) const84*993b0882SAndroid Build Coastguard Worker StatusOr<int32> UniLibBase::Length(const UnicodeText& text) const {
85*993b0882SAndroid Build Coastguard Worker   TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jstring> text_java,
86*993b0882SAndroid Build Coastguard Worker                        jni_cache_->ConvertToJavaString(text));
87*993b0882SAndroid Build Coastguard Worker 
88*993b0882SAndroid Build Coastguard Worker   JNIEnv* jenv = jni_cache_->GetEnv();
89*993b0882SAndroid Build Coastguard Worker   TC3_ASSIGN_OR_RETURN(int utf16_length,
90*993b0882SAndroid Build Coastguard Worker                        JniHelper::CallIntMethod(jenv, text_java.get(),
91*993b0882SAndroid Build Coastguard Worker                                                 jni_cache_->string_length));
92*993b0882SAndroid Build Coastguard Worker 
93*993b0882SAndroid Build Coastguard Worker   return JniHelper::CallIntMethod(jenv, text_java.get(),
94*993b0882SAndroid Build Coastguard Worker                                   jni_cache_->string_code_point_count, 0,
95*993b0882SAndroid Build Coastguard Worker                                   utf16_length);
96*993b0882SAndroid Build Coastguard Worker }
97*993b0882SAndroid Build Coastguard Worker 
ParseInt32(const UnicodeText & text,int32 * result) const98*993b0882SAndroid Build Coastguard Worker bool UniLibBase::ParseInt32(const UnicodeText& text, int32* result) const {
99*993b0882SAndroid Build Coastguard Worker   return ParseInt(text, result);
100*993b0882SAndroid Build Coastguard Worker }
101*993b0882SAndroid Build Coastguard Worker 
ParseInt64(const UnicodeText & text,int64 * result) const102*993b0882SAndroid Build Coastguard Worker bool UniLibBase::ParseInt64(const UnicodeText& text, int64* result) const {
103*993b0882SAndroid Build Coastguard Worker   return ParseInt(text, result);
104*993b0882SAndroid Build Coastguard Worker }
105*993b0882SAndroid Build Coastguard Worker 
ParseDouble(const UnicodeText & text,double * result) const106*993b0882SAndroid Build Coastguard Worker bool UniLibBase::ParseDouble(const UnicodeText& text, double* result) const {
107*993b0882SAndroid Build Coastguard Worker   if (!jni_cache_) {
108*993b0882SAndroid Build Coastguard Worker     return false;
109*993b0882SAndroid Build Coastguard Worker   }
110*993b0882SAndroid Build Coastguard Worker 
111*993b0882SAndroid Build Coastguard Worker   auto it_dot = text.begin();
112*993b0882SAndroid Build Coastguard Worker   for (; it_dot != text.end() && !IsDot(*it_dot); it_dot++) {
113*993b0882SAndroid Build Coastguard Worker   }
114*993b0882SAndroid Build Coastguard Worker 
115*993b0882SAndroid Build Coastguard Worker   int32 integer_part;
116*993b0882SAndroid Build Coastguard Worker   if (!ParseInt(UnicodeText::Substring(text.begin(), it_dot, /*do_copy=*/false),
117*993b0882SAndroid Build Coastguard Worker                 &integer_part)) {
118*993b0882SAndroid Build Coastguard Worker     return false;
119*993b0882SAndroid Build Coastguard Worker   }
120*993b0882SAndroid Build Coastguard Worker 
121*993b0882SAndroid Build Coastguard Worker   int32 fractional_part = 0;
122*993b0882SAndroid Build Coastguard Worker   if (it_dot != text.end()) {
123*993b0882SAndroid Build Coastguard Worker     if (!ParseInt(
124*993b0882SAndroid Build Coastguard Worker             UnicodeText::Substring(++it_dot, text.end(), /*do_copy=*/false),
125*993b0882SAndroid Build Coastguard Worker             &fractional_part)) {
126*993b0882SAndroid Build Coastguard Worker       return false;
127*993b0882SAndroid Build Coastguard Worker     }
128*993b0882SAndroid Build Coastguard Worker   }
129*993b0882SAndroid Build Coastguard Worker 
130*993b0882SAndroid Build Coastguard Worker   double factional_part_double = fractional_part;
131*993b0882SAndroid Build Coastguard Worker   while (factional_part_double >= 1) {
132*993b0882SAndroid Build Coastguard Worker     factional_part_double /= 10;
133*993b0882SAndroid Build Coastguard Worker   }
134*993b0882SAndroid Build Coastguard Worker   *result = integer_part + factional_part_double;
135*993b0882SAndroid Build Coastguard Worker 
136*993b0882SAndroid Build Coastguard Worker   return true;
137*993b0882SAndroid Build Coastguard Worker }
138*993b0882SAndroid Build Coastguard Worker 
CreateRegexPattern(const UnicodeText & regex) const139*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLibBase::RegexPattern> UniLibBase::CreateRegexPattern(
140*993b0882SAndroid Build Coastguard Worker     const UnicodeText& regex) const {
141*993b0882SAndroid Build Coastguard Worker   return std::unique_ptr<UniLibBase::RegexPattern>(
142*993b0882SAndroid Build Coastguard Worker       new UniLibBase::RegexPattern(jni_cache_.get(), regex, /*lazy=*/false));
143*993b0882SAndroid Build Coastguard Worker }
144*993b0882SAndroid Build Coastguard Worker 
CreateLazyRegexPattern(const UnicodeText & regex) const145*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLibBase::RegexPattern> UniLibBase::CreateLazyRegexPattern(
146*993b0882SAndroid Build Coastguard Worker     const UnicodeText& regex) const {
147*993b0882SAndroid Build Coastguard Worker   return std::unique_ptr<UniLibBase::RegexPattern>(
148*993b0882SAndroid Build Coastguard Worker       new UniLibBase::RegexPattern(jni_cache_.get(), regex, /*lazy=*/true));
149*993b0882SAndroid Build Coastguard Worker }
150*993b0882SAndroid Build Coastguard Worker 
RegexPattern(const JniCache * jni_cache,const UnicodeText & pattern,bool lazy)151*993b0882SAndroid Build Coastguard Worker UniLibBase::RegexPattern::RegexPattern(const JniCache* jni_cache,
152*993b0882SAndroid Build Coastguard Worker                                        const UnicodeText& pattern, bool lazy)
153*993b0882SAndroid Build Coastguard Worker     : jni_cache_(jni_cache),
154*993b0882SAndroid Build Coastguard Worker       pattern_(nullptr, jni_cache ? jni_cache->jvm : nullptr),
155*993b0882SAndroid Build Coastguard Worker       initialized_(false),
156*993b0882SAndroid Build Coastguard Worker       initialization_failure_(false),
157*993b0882SAndroid Build Coastguard Worker       pattern_text_(pattern) {
158*993b0882SAndroid Build Coastguard Worker   if (!lazy) {
159*993b0882SAndroid Build Coastguard Worker     LockedInitializeIfNotAlready();
160*993b0882SAndroid Build Coastguard Worker   }
161*993b0882SAndroid Build Coastguard Worker }
162*993b0882SAndroid Build Coastguard Worker 
LockedInitializeIfNotAlready() const163*993b0882SAndroid Build Coastguard Worker Status UniLibBase::RegexPattern::LockedInitializeIfNotAlready() const {
164*993b0882SAndroid Build Coastguard Worker   std::lock_guard<std::mutex> guard(mutex_);
165*993b0882SAndroid Build Coastguard Worker   if (initialized_ || initialization_failure_) {
166*993b0882SAndroid Build Coastguard Worker     return Status::OK;
167*993b0882SAndroid Build Coastguard Worker   }
168*993b0882SAndroid Build Coastguard Worker 
169*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
170*993b0882SAndroid Build Coastguard Worker     JNIEnv* jenv = jni_cache_->GetEnv();
171*993b0882SAndroid Build Coastguard Worker     initialization_failure_ = true;
172*993b0882SAndroid Build Coastguard Worker     TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jstring> regex_java,
173*993b0882SAndroid Build Coastguard Worker                          jni_cache_->ConvertToJavaString(pattern_text_));
174*993b0882SAndroid Build Coastguard Worker     TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jobject> pattern,
175*993b0882SAndroid Build Coastguard Worker                          JniHelper::CallStaticObjectMethod(
176*993b0882SAndroid Build Coastguard Worker                              jenv, jni_cache_->pattern_class.get(),
177*993b0882SAndroid Build Coastguard Worker                              jni_cache_->pattern_compile, regex_java.get()));
178*993b0882SAndroid Build Coastguard Worker     pattern_ = MakeGlobalRef(pattern.get(), jenv, jni_cache_->jvm);
179*993b0882SAndroid Build Coastguard Worker     if (pattern_ == nullptr) {
180*993b0882SAndroid Build Coastguard Worker       return Status::UNKNOWN;
181*993b0882SAndroid Build Coastguard Worker     }
182*993b0882SAndroid Build Coastguard Worker 
183*993b0882SAndroid Build Coastguard Worker     initialization_failure_ = false;
184*993b0882SAndroid Build Coastguard Worker     initialized_ = true;
185*993b0882SAndroid Build Coastguard Worker     pattern_text_.clear();  // We don't need this anymore.
186*993b0882SAndroid Build Coastguard Worker   }
187*993b0882SAndroid Build Coastguard Worker   return Status::OK;
188*993b0882SAndroid Build Coastguard Worker }
189*993b0882SAndroid Build Coastguard Worker 
190*993b0882SAndroid Build Coastguard Worker constexpr int UniLibBase::RegexMatcher::kError;
191*993b0882SAndroid Build Coastguard Worker constexpr int UniLibBase::RegexMatcher::kNoError;
192*993b0882SAndroid Build Coastguard Worker 
Matcher(const UnicodeText & context) const193*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLibBase::RegexMatcher> UniLibBase::RegexPattern::Matcher(
194*993b0882SAndroid Build Coastguard Worker     const UnicodeText& context) const {
195*993b0882SAndroid Build Coastguard Worker   LockedInitializeIfNotAlready();  // Possibly lazy initialization.
196*993b0882SAndroid Build Coastguard Worker   if (initialization_failure_) {
197*993b0882SAndroid Build Coastguard Worker     return nullptr;
198*993b0882SAndroid Build Coastguard Worker   }
199*993b0882SAndroid Build Coastguard Worker 
200*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
201*993b0882SAndroid Build Coastguard Worker     JNIEnv* env = jni_cache_->GetEnv();
202*993b0882SAndroid Build Coastguard Worker     const StatusOr<ScopedLocalRef<jstring>> status_or_context_java =
203*993b0882SAndroid Build Coastguard Worker         jni_cache_->ConvertToJavaString(context);
204*993b0882SAndroid Build Coastguard Worker     if (!status_or_context_java.ok() || !status_or_context_java.ValueOrDie()) {
205*993b0882SAndroid Build Coastguard Worker       return nullptr;
206*993b0882SAndroid Build Coastguard Worker     }
207*993b0882SAndroid Build Coastguard Worker     const StatusOr<ScopedLocalRef<jobject>> status_or_matcher =
208*993b0882SAndroid Build Coastguard Worker         JniHelper::CallObjectMethod(env, pattern_.get(),
209*993b0882SAndroid Build Coastguard Worker                                     jni_cache_->pattern_matcher,
210*993b0882SAndroid Build Coastguard Worker                                     status_or_context_java.ValueOrDie().get());
211*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear() || !status_or_matcher.ok() ||
212*993b0882SAndroid Build Coastguard Worker         !status_or_matcher.ValueOrDie()) {
213*993b0882SAndroid Build Coastguard Worker       return nullptr;
214*993b0882SAndroid Build Coastguard Worker     }
215*993b0882SAndroid Build Coastguard Worker     return std::unique_ptr<UniLibBase::RegexMatcher>(new RegexMatcher(
216*993b0882SAndroid Build Coastguard Worker         jni_cache_,
217*993b0882SAndroid Build Coastguard Worker         MakeGlobalRef(status_or_matcher.ValueOrDie().get(), env,
218*993b0882SAndroid Build Coastguard Worker                       jni_cache_->jvm),
219*993b0882SAndroid Build Coastguard Worker         MakeGlobalRef(status_or_context_java.ValueOrDie().get(), env,
220*993b0882SAndroid Build Coastguard Worker                       jni_cache_->jvm)));
221*993b0882SAndroid Build Coastguard Worker   } else {
222*993b0882SAndroid Build Coastguard Worker     // NOTE: A valid object needs to be created here to pass the interface
223*993b0882SAndroid Build Coastguard Worker     // tests.
224*993b0882SAndroid Build Coastguard Worker     return std::unique_ptr<UniLibBase::RegexMatcher>(
225*993b0882SAndroid Build Coastguard Worker         new RegexMatcher(jni_cache_, {}, {}));
226*993b0882SAndroid Build Coastguard Worker   }
227*993b0882SAndroid Build Coastguard Worker }
228*993b0882SAndroid Build Coastguard Worker 
RegexMatcher(const JniCache * jni_cache,ScopedGlobalRef<jobject> matcher,ScopedGlobalRef<jstring> text)229*993b0882SAndroid Build Coastguard Worker UniLibBase::RegexMatcher::RegexMatcher(const JniCache* jni_cache,
230*993b0882SAndroid Build Coastguard Worker                                        ScopedGlobalRef<jobject> matcher,
231*993b0882SAndroid Build Coastguard Worker                                        ScopedGlobalRef<jstring> text)
232*993b0882SAndroid Build Coastguard Worker     : jni_cache_(jni_cache),
233*993b0882SAndroid Build Coastguard Worker       matcher_(std::move(matcher)),
234*993b0882SAndroid Build Coastguard Worker       text_(std::move(text)) {}
235*993b0882SAndroid Build Coastguard Worker 
Matches(int * status) const236*993b0882SAndroid Build Coastguard Worker bool UniLibBase::RegexMatcher::Matches(int* status) const {
237*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
238*993b0882SAndroid Build Coastguard Worker     *status = kNoError;
239*993b0882SAndroid Build Coastguard Worker     const bool result = jni_cache_->GetEnv()->CallBooleanMethod(
240*993b0882SAndroid Build Coastguard Worker         matcher_.get(), jni_cache_->matcher_matches);
241*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear()) {
242*993b0882SAndroid Build Coastguard Worker       *status = kError;
243*993b0882SAndroid Build Coastguard Worker       return false;
244*993b0882SAndroid Build Coastguard Worker     }
245*993b0882SAndroid Build Coastguard Worker     return result;
246*993b0882SAndroid Build Coastguard Worker   } else {
247*993b0882SAndroid Build Coastguard Worker     *status = kError;
248*993b0882SAndroid Build Coastguard Worker     return false;
249*993b0882SAndroid Build Coastguard Worker   }
250*993b0882SAndroid Build Coastguard Worker }
251*993b0882SAndroid Build Coastguard Worker 
ApproximatelyMatches(int * status)252*993b0882SAndroid Build Coastguard Worker bool UniLibBase::RegexMatcher::ApproximatelyMatches(int* status) {
253*993b0882SAndroid Build Coastguard Worker   *status = kNoError;
254*993b0882SAndroid Build Coastguard Worker 
255*993b0882SAndroid Build Coastguard Worker   jni_cache_->GetEnv()->CallObjectMethod(matcher_.get(),
256*993b0882SAndroid Build Coastguard Worker                                          jni_cache_->matcher_reset);
257*993b0882SAndroid Build Coastguard Worker   if (jni_cache_->ExceptionCheckAndClear()) {
258*993b0882SAndroid Build Coastguard Worker     *status = kError;
259*993b0882SAndroid Build Coastguard Worker     return kError;
260*993b0882SAndroid Build Coastguard Worker   }
261*993b0882SAndroid Build Coastguard Worker 
262*993b0882SAndroid Build Coastguard Worker   if (!Find(status) || *status != kNoError) {
263*993b0882SAndroid Build Coastguard Worker     return false;
264*993b0882SAndroid Build Coastguard Worker   }
265*993b0882SAndroid Build Coastguard Worker 
266*993b0882SAndroid Build Coastguard Worker   const int found_start = jni_cache_->GetEnv()->CallIntMethod(
267*993b0882SAndroid Build Coastguard Worker       matcher_.get(), jni_cache_->matcher_start_idx, 0);
268*993b0882SAndroid Build Coastguard Worker   if (jni_cache_->ExceptionCheckAndClear()) {
269*993b0882SAndroid Build Coastguard Worker     *status = kError;
270*993b0882SAndroid Build Coastguard Worker     return kError;
271*993b0882SAndroid Build Coastguard Worker   }
272*993b0882SAndroid Build Coastguard Worker 
273*993b0882SAndroid Build Coastguard Worker   const int found_end = jni_cache_->GetEnv()->CallIntMethod(
274*993b0882SAndroid Build Coastguard Worker       matcher_.get(), jni_cache_->matcher_end_idx, 0);
275*993b0882SAndroid Build Coastguard Worker   if (jni_cache_->ExceptionCheckAndClear()) {
276*993b0882SAndroid Build Coastguard Worker     *status = kError;
277*993b0882SAndroid Build Coastguard Worker     return kError;
278*993b0882SAndroid Build Coastguard Worker   }
279*993b0882SAndroid Build Coastguard Worker 
280*993b0882SAndroid Build Coastguard Worker   int context_length_bmp = jni_cache_->GetEnv()->CallIntMethod(
281*993b0882SAndroid Build Coastguard Worker       text_.get(), jni_cache_->string_length);
282*993b0882SAndroid Build Coastguard Worker   if (jni_cache_->ExceptionCheckAndClear()) {
283*993b0882SAndroid Build Coastguard Worker     *status = kError;
284*993b0882SAndroid Build Coastguard Worker     return false;
285*993b0882SAndroid Build Coastguard Worker   }
286*993b0882SAndroid Build Coastguard Worker 
287*993b0882SAndroid Build Coastguard Worker   if (found_start != 0 || found_end != context_length_bmp) {
288*993b0882SAndroid Build Coastguard Worker     return false;
289*993b0882SAndroid Build Coastguard Worker   }
290*993b0882SAndroid Build Coastguard Worker 
291*993b0882SAndroid Build Coastguard Worker   return true;
292*993b0882SAndroid Build Coastguard Worker }
293*993b0882SAndroid Build Coastguard Worker 
UpdateLastFindOffset() const294*993b0882SAndroid Build Coastguard Worker bool UniLibBase::RegexMatcher::UpdateLastFindOffset() const {
295*993b0882SAndroid Build Coastguard Worker   if (!last_find_offset_dirty_) {
296*993b0882SAndroid Build Coastguard Worker     return true;
297*993b0882SAndroid Build Coastguard Worker   }
298*993b0882SAndroid Build Coastguard Worker 
299*993b0882SAndroid Build Coastguard Worker   const int find_offset = jni_cache_->GetEnv()->CallIntMethod(
300*993b0882SAndroid Build Coastguard Worker       matcher_.get(), jni_cache_->matcher_start_idx, 0);
301*993b0882SAndroid Build Coastguard Worker   if (jni_cache_->ExceptionCheckAndClear()) {
302*993b0882SAndroid Build Coastguard Worker     return false;
303*993b0882SAndroid Build Coastguard Worker   }
304*993b0882SAndroid Build Coastguard Worker 
305*993b0882SAndroid Build Coastguard Worker   const int codepoint_count = jni_cache_->GetEnv()->CallIntMethod(
306*993b0882SAndroid Build Coastguard Worker       text_.get(), jni_cache_->string_code_point_count, last_find_offset_,
307*993b0882SAndroid Build Coastguard Worker       find_offset);
308*993b0882SAndroid Build Coastguard Worker   if (jni_cache_->ExceptionCheckAndClear()) {
309*993b0882SAndroid Build Coastguard Worker     return false;
310*993b0882SAndroid Build Coastguard Worker   }
311*993b0882SAndroid Build Coastguard Worker 
312*993b0882SAndroid Build Coastguard Worker   last_find_offset_codepoints_ += codepoint_count;
313*993b0882SAndroid Build Coastguard Worker   last_find_offset_ = find_offset;
314*993b0882SAndroid Build Coastguard Worker   last_find_offset_dirty_ = false;
315*993b0882SAndroid Build Coastguard Worker 
316*993b0882SAndroid Build Coastguard Worker   return true;
317*993b0882SAndroid Build Coastguard Worker }
318*993b0882SAndroid Build Coastguard Worker 
Find(int * status)319*993b0882SAndroid Build Coastguard Worker bool UniLibBase::RegexMatcher::Find(int* status) {
320*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
321*993b0882SAndroid Build Coastguard Worker     const bool result = jni_cache_->GetEnv()->CallBooleanMethod(
322*993b0882SAndroid Build Coastguard Worker         matcher_.get(), jni_cache_->matcher_find);
323*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear()) {
324*993b0882SAndroid Build Coastguard Worker       *status = kError;
325*993b0882SAndroid Build Coastguard Worker       return false;
326*993b0882SAndroid Build Coastguard Worker     }
327*993b0882SAndroid Build Coastguard Worker 
328*993b0882SAndroid Build Coastguard Worker     last_find_offset_dirty_ = true;
329*993b0882SAndroid Build Coastguard Worker     *status = kNoError;
330*993b0882SAndroid Build Coastguard Worker     return result;
331*993b0882SAndroid Build Coastguard Worker   } else {
332*993b0882SAndroid Build Coastguard Worker     *status = kError;
333*993b0882SAndroid Build Coastguard Worker     return false;
334*993b0882SAndroid Build Coastguard Worker   }
335*993b0882SAndroid Build Coastguard Worker }
336*993b0882SAndroid Build Coastguard Worker 
Start(int * status) const337*993b0882SAndroid Build Coastguard Worker int UniLibBase::RegexMatcher::Start(int* status) const {
338*993b0882SAndroid Build Coastguard Worker   return Start(/*group_idx=*/0, status);
339*993b0882SAndroid Build Coastguard Worker }
340*993b0882SAndroid Build Coastguard Worker 
Start(int group_idx,int * status) const341*993b0882SAndroid Build Coastguard Worker int UniLibBase::RegexMatcher::Start(int group_idx, int* status) const {
342*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
343*993b0882SAndroid Build Coastguard Worker     *status = kNoError;
344*993b0882SAndroid Build Coastguard Worker 
345*993b0882SAndroid Build Coastguard Worker     if (!UpdateLastFindOffset()) {
346*993b0882SAndroid Build Coastguard Worker       *status = kError;
347*993b0882SAndroid Build Coastguard Worker       return kError;
348*993b0882SAndroid Build Coastguard Worker     }
349*993b0882SAndroid Build Coastguard Worker 
350*993b0882SAndroid Build Coastguard Worker     const int java_index = jni_cache_->GetEnv()->CallIntMethod(
351*993b0882SAndroid Build Coastguard Worker         matcher_.get(), jni_cache_->matcher_start_idx, group_idx);
352*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear()) {
353*993b0882SAndroid Build Coastguard Worker       *status = kError;
354*993b0882SAndroid Build Coastguard Worker       return kError;
355*993b0882SAndroid Build Coastguard Worker     }
356*993b0882SAndroid Build Coastguard Worker 
357*993b0882SAndroid Build Coastguard Worker     // If the group didn't participate in the match the index is -1.
358*993b0882SAndroid Build Coastguard Worker     if (java_index == -1) {
359*993b0882SAndroid Build Coastguard Worker       return -1;
360*993b0882SAndroid Build Coastguard Worker     }
361*993b0882SAndroid Build Coastguard Worker 
362*993b0882SAndroid Build Coastguard Worker     const int unicode_index = jni_cache_->GetEnv()->CallIntMethod(
363*993b0882SAndroid Build Coastguard Worker         text_.get(), jni_cache_->string_code_point_count, last_find_offset_,
364*993b0882SAndroid Build Coastguard Worker         java_index);
365*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear()) {
366*993b0882SAndroid Build Coastguard Worker       *status = kError;
367*993b0882SAndroid Build Coastguard Worker       return kError;
368*993b0882SAndroid Build Coastguard Worker     }
369*993b0882SAndroid Build Coastguard Worker 
370*993b0882SAndroid Build Coastguard Worker     return unicode_index + last_find_offset_codepoints_;
371*993b0882SAndroid Build Coastguard Worker   } else {
372*993b0882SAndroid Build Coastguard Worker     *status = kError;
373*993b0882SAndroid Build Coastguard Worker     return kError;
374*993b0882SAndroid Build Coastguard Worker   }
375*993b0882SAndroid Build Coastguard Worker }
376*993b0882SAndroid Build Coastguard Worker 
End(int * status) const377*993b0882SAndroid Build Coastguard Worker int UniLibBase::RegexMatcher::End(int* status) const {
378*993b0882SAndroid Build Coastguard Worker   return End(/*group_idx=*/0, status);
379*993b0882SAndroid Build Coastguard Worker }
380*993b0882SAndroid Build Coastguard Worker 
End(int group_idx,int * status) const381*993b0882SAndroid Build Coastguard Worker int UniLibBase::RegexMatcher::End(int group_idx, int* status) const {
382*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
383*993b0882SAndroid Build Coastguard Worker     *status = kNoError;
384*993b0882SAndroid Build Coastguard Worker 
385*993b0882SAndroid Build Coastguard Worker     if (!UpdateLastFindOffset()) {
386*993b0882SAndroid Build Coastguard Worker       *status = kError;
387*993b0882SAndroid Build Coastguard Worker       return kError;
388*993b0882SAndroid Build Coastguard Worker     }
389*993b0882SAndroid Build Coastguard Worker 
390*993b0882SAndroid Build Coastguard Worker     const int java_index = jni_cache_->GetEnv()->CallIntMethod(
391*993b0882SAndroid Build Coastguard Worker         matcher_.get(), jni_cache_->matcher_end_idx, group_idx);
392*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear()) {
393*993b0882SAndroid Build Coastguard Worker       *status = kError;
394*993b0882SAndroid Build Coastguard Worker       return kError;
395*993b0882SAndroid Build Coastguard Worker     }
396*993b0882SAndroid Build Coastguard Worker 
397*993b0882SAndroid Build Coastguard Worker     // If the group didn't participate in the match the index is -1.
398*993b0882SAndroid Build Coastguard Worker     if (java_index == -1) {
399*993b0882SAndroid Build Coastguard Worker       return -1;
400*993b0882SAndroid Build Coastguard Worker     }
401*993b0882SAndroid Build Coastguard Worker 
402*993b0882SAndroid Build Coastguard Worker     const int unicode_index = jni_cache_->GetEnv()->CallIntMethod(
403*993b0882SAndroid Build Coastguard Worker         text_.get(), jni_cache_->string_code_point_count, last_find_offset_,
404*993b0882SAndroid Build Coastguard Worker         java_index);
405*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear()) {
406*993b0882SAndroid Build Coastguard Worker       *status = kError;
407*993b0882SAndroid Build Coastguard Worker       return kError;
408*993b0882SAndroid Build Coastguard Worker     }
409*993b0882SAndroid Build Coastguard Worker 
410*993b0882SAndroid Build Coastguard Worker     return unicode_index + last_find_offset_codepoints_;
411*993b0882SAndroid Build Coastguard Worker   } else {
412*993b0882SAndroid Build Coastguard Worker     *status = kError;
413*993b0882SAndroid Build Coastguard Worker     return kError;
414*993b0882SAndroid Build Coastguard Worker   }
415*993b0882SAndroid Build Coastguard Worker }
416*993b0882SAndroid Build Coastguard Worker 
Group(int * status) const417*993b0882SAndroid Build Coastguard Worker UnicodeText UniLibBase::RegexMatcher::Group(int* status) const {
418*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
419*993b0882SAndroid Build Coastguard Worker     JNIEnv* jenv = jni_cache_->GetEnv();
420*993b0882SAndroid Build Coastguard Worker     StatusOr<ScopedLocalRef<jstring>> status_or_java_result =
421*993b0882SAndroid Build Coastguard Worker         JniHelper::CallObjectMethod<jstring>(jenv, matcher_.get(),
422*993b0882SAndroid Build Coastguard Worker                                              jni_cache_->matcher_group);
423*993b0882SAndroid Build Coastguard Worker 
424*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear() || !status_or_java_result.ok() ||
425*993b0882SAndroid Build Coastguard Worker         !status_or_java_result.ValueOrDie()) {
426*993b0882SAndroid Build Coastguard Worker       *status = kError;
427*993b0882SAndroid Build Coastguard Worker       return UTF8ToUnicodeText("", /*do_copy=*/false);
428*993b0882SAndroid Build Coastguard Worker     }
429*993b0882SAndroid Build Coastguard Worker 
430*993b0882SAndroid Build Coastguard Worker     StatusOr<std::string> status_or_result =
431*993b0882SAndroid Build Coastguard Worker         JStringToUtf8String(jenv, status_or_java_result.ValueOrDie().get());
432*993b0882SAndroid Build Coastguard Worker     if (!status_or_result.ok()) {
433*993b0882SAndroid Build Coastguard Worker       *status = kError;
434*993b0882SAndroid Build Coastguard Worker       return UTF8ToUnicodeText("", /*do_copy=*/false);
435*993b0882SAndroid Build Coastguard Worker     }
436*993b0882SAndroid Build Coastguard Worker     *status = kNoError;
437*993b0882SAndroid Build Coastguard Worker     return UTF8ToUnicodeText(status_or_result.ValueOrDie(), /*do_copy=*/true);
438*993b0882SAndroid Build Coastguard Worker   } else {
439*993b0882SAndroid Build Coastguard Worker     *status = kError;
440*993b0882SAndroid Build Coastguard Worker     return UTF8ToUnicodeText("", /*do_copy=*/false);
441*993b0882SAndroid Build Coastguard Worker   }
442*993b0882SAndroid Build Coastguard Worker }
443*993b0882SAndroid Build Coastguard Worker 
Group(int group_idx,int * status) const444*993b0882SAndroid Build Coastguard Worker UnicodeText UniLibBase::RegexMatcher::Group(int group_idx, int* status) const {
445*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
446*993b0882SAndroid Build Coastguard Worker     JNIEnv* jenv = jni_cache_->GetEnv();
447*993b0882SAndroid Build Coastguard Worker 
448*993b0882SAndroid Build Coastguard Worker     StatusOr<ScopedLocalRef<jstring>> status_or_java_result =
449*993b0882SAndroid Build Coastguard Worker         JniHelper::CallObjectMethod<jstring>(
450*993b0882SAndroid Build Coastguard Worker             jenv, matcher_.get(), jni_cache_->matcher_group_idx, group_idx);
451*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear() || !status_or_java_result.ok()) {
452*993b0882SAndroid Build Coastguard Worker       *status = kError;
453*993b0882SAndroid Build Coastguard Worker       TC3_LOG(ERROR) << "Exception occurred";
454*993b0882SAndroid Build Coastguard Worker       return UTF8ToUnicodeText("", /*do_copy=*/false);
455*993b0882SAndroid Build Coastguard Worker     }
456*993b0882SAndroid Build Coastguard Worker 
457*993b0882SAndroid Build Coastguard Worker     // java_result is nullptr when the group did not participate in the match.
458*993b0882SAndroid Build Coastguard Worker     // For these cases other UniLib implementations return empty string, and
459*993b0882SAndroid Build Coastguard Worker     // the participation can be checked by checking if Start() == -1.
460*993b0882SAndroid Build Coastguard Worker     if (!status_or_java_result.ValueOrDie()) {
461*993b0882SAndroid Build Coastguard Worker       *status = kNoError;
462*993b0882SAndroid Build Coastguard Worker       return UTF8ToUnicodeText("", /*do_copy=*/false);
463*993b0882SAndroid Build Coastguard Worker     }
464*993b0882SAndroid Build Coastguard Worker 
465*993b0882SAndroid Build Coastguard Worker     StatusOr<std::string> status_or_result =
466*993b0882SAndroid Build Coastguard Worker         JStringToUtf8String(jenv, status_or_java_result.ValueOrDie().get());
467*993b0882SAndroid Build Coastguard Worker     if (!status_or_result.ok()) {
468*993b0882SAndroid Build Coastguard Worker       *status = kError;
469*993b0882SAndroid Build Coastguard Worker       return UTF8ToUnicodeText("", /*do_copy=*/false);
470*993b0882SAndroid Build Coastguard Worker     }
471*993b0882SAndroid Build Coastguard Worker     *status = kNoError;
472*993b0882SAndroid Build Coastguard Worker     return UTF8ToUnicodeText(status_or_result.ValueOrDie(), /*do_copy=*/true);
473*993b0882SAndroid Build Coastguard Worker   } else {
474*993b0882SAndroid Build Coastguard Worker     *status = kError;
475*993b0882SAndroid Build Coastguard Worker     return UTF8ToUnicodeText("", /*do_copy=*/false);
476*993b0882SAndroid Build Coastguard Worker   }
477*993b0882SAndroid Build Coastguard Worker }
478*993b0882SAndroid Build Coastguard Worker 
479*993b0882SAndroid Build Coastguard Worker constexpr int UniLibBase::BreakIterator::kDone;
480*993b0882SAndroid Build Coastguard Worker 
BreakIterator(const JniCache * jni_cache,const UnicodeText & text)481*993b0882SAndroid Build Coastguard Worker UniLibBase::BreakIterator::BreakIterator(const JniCache* jni_cache,
482*993b0882SAndroid Build Coastguard Worker                                          const UnicodeText& text)
483*993b0882SAndroid Build Coastguard Worker     : jni_cache_(jni_cache),
484*993b0882SAndroid Build Coastguard Worker       text_(nullptr, jni_cache ? jni_cache->jvm : nullptr),
485*993b0882SAndroid Build Coastguard Worker       iterator_(nullptr, jni_cache ? jni_cache->jvm : nullptr),
486*993b0882SAndroid Build Coastguard Worker       last_break_index_(0),
487*993b0882SAndroid Build Coastguard Worker       last_unicode_index_(0) {
488*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
489*993b0882SAndroid Build Coastguard Worker     JNIEnv* jenv = jni_cache_->GetEnv();
490*993b0882SAndroid Build Coastguard Worker     StatusOr<ScopedLocalRef<jstring>> status_or_text =
491*993b0882SAndroid Build Coastguard Worker         jni_cache_->ConvertToJavaString(text);
492*993b0882SAndroid Build Coastguard Worker     if (!status_or_text.ok()) {
493*993b0882SAndroid Build Coastguard Worker       return;
494*993b0882SAndroid Build Coastguard Worker     }
495*993b0882SAndroid Build Coastguard Worker     text_ =
496*993b0882SAndroid Build Coastguard Worker         MakeGlobalRef(status_or_text.ValueOrDie().get(), jenv, jni_cache->jvm);
497*993b0882SAndroid Build Coastguard Worker     if (!text_) {
498*993b0882SAndroid Build Coastguard Worker       return;
499*993b0882SAndroid Build Coastguard Worker     }
500*993b0882SAndroid Build Coastguard Worker 
501*993b0882SAndroid Build Coastguard Worker     StatusOr<ScopedLocalRef<jobject>> status_or_iterator =
502*993b0882SAndroid Build Coastguard Worker         JniHelper::CallStaticObjectMethod(
503*993b0882SAndroid Build Coastguard Worker             jenv, jni_cache->breakiterator_class.get(),
504*993b0882SAndroid Build Coastguard Worker             jni_cache->breakiterator_getwordinstance,
505*993b0882SAndroid Build Coastguard Worker             jni_cache->locale_us.get());
506*993b0882SAndroid Build Coastguard Worker     if (!status_or_iterator.ok()) {
507*993b0882SAndroid Build Coastguard Worker       return;
508*993b0882SAndroid Build Coastguard Worker     }
509*993b0882SAndroid Build Coastguard Worker     iterator_ = MakeGlobalRef(status_or_iterator.ValueOrDie().get(), jenv,
510*993b0882SAndroid Build Coastguard Worker                               jni_cache->jvm);
511*993b0882SAndroid Build Coastguard Worker     if (!iterator_) {
512*993b0882SAndroid Build Coastguard Worker       return;
513*993b0882SAndroid Build Coastguard Worker     }
514*993b0882SAndroid Build Coastguard Worker     JniHelper::CallVoidMethod(jenv, iterator_.get(),
515*993b0882SAndroid Build Coastguard Worker                               jni_cache->breakiterator_settext, text_.get());
516*993b0882SAndroid Build Coastguard Worker   }
517*993b0882SAndroid Build Coastguard Worker }
518*993b0882SAndroid Build Coastguard Worker 
Next()519*993b0882SAndroid Build Coastguard Worker int UniLibBase::BreakIterator::Next() {
520*993b0882SAndroid Build Coastguard Worker   if (jni_cache_) {
521*993b0882SAndroid Build Coastguard Worker     const int break_index = jni_cache_->GetEnv()->CallIntMethod(
522*993b0882SAndroid Build Coastguard Worker         iterator_.get(), jni_cache_->breakiterator_next);
523*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear() ||
524*993b0882SAndroid Build Coastguard Worker         break_index == BreakIterator::kDone) {
525*993b0882SAndroid Build Coastguard Worker       return BreakIterator::kDone;
526*993b0882SAndroid Build Coastguard Worker     }
527*993b0882SAndroid Build Coastguard Worker 
528*993b0882SAndroid Build Coastguard Worker     const int token_unicode_length = jni_cache_->GetEnv()->CallIntMethod(
529*993b0882SAndroid Build Coastguard Worker         text_.get(), jni_cache_->string_code_point_count, last_break_index_,
530*993b0882SAndroid Build Coastguard Worker         break_index);
531*993b0882SAndroid Build Coastguard Worker     if (jni_cache_->ExceptionCheckAndClear()) {
532*993b0882SAndroid Build Coastguard Worker       return BreakIterator::kDone;
533*993b0882SAndroid Build Coastguard Worker     }
534*993b0882SAndroid Build Coastguard Worker 
535*993b0882SAndroid Build Coastguard Worker     last_break_index_ = break_index;
536*993b0882SAndroid Build Coastguard Worker     return last_unicode_index_ += token_unicode_length;
537*993b0882SAndroid Build Coastguard Worker   }
538*993b0882SAndroid Build Coastguard Worker   return BreakIterator::kDone;
539*993b0882SAndroid Build Coastguard Worker }
540*993b0882SAndroid Build Coastguard Worker 
CreateBreakIterator(const UnicodeText & text) const541*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLibBase::BreakIterator> UniLibBase::CreateBreakIterator(
542*993b0882SAndroid Build Coastguard Worker     const UnicodeText& text) const {
543*993b0882SAndroid Build Coastguard Worker   return std::unique_ptr<UniLibBase::BreakIterator>(
544*993b0882SAndroid Build Coastguard Worker       new UniLibBase::BreakIterator(jni_cache_.get(), text));
545*993b0882SAndroid Build Coastguard Worker }
546*993b0882SAndroid Build Coastguard Worker 
547*993b0882SAndroid Build Coastguard Worker }  // namespace libtextclassifier3
548