1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker *
4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker *
8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker *
10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker */
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unilib-javaicu.h"
18*993b0882SAndroid Build Coastguard Worker
19*993b0882SAndroid Build Coastguard Worker #include <math.h>
20*993b0882SAndroid Build Coastguard Worker
21*993b0882SAndroid Build Coastguard Worker #include <cassert>
22*993b0882SAndroid Build Coastguard Worker #include <cctype>
23*993b0882SAndroid Build Coastguard Worker #include <map>
24*993b0882SAndroid Build Coastguard Worker
25*993b0882SAndroid Build Coastguard Worker #include "utils/base/logging.h"
26*993b0882SAndroid Build Coastguard Worker #include "utils/base/statusor.h"
27*993b0882SAndroid Build Coastguard Worker #include "utils/java/jni-base.h"
28*993b0882SAndroid Build Coastguard Worker #include "utils/java/jni-helper.h"
29*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unicodetext.h"
30*993b0882SAndroid Build Coastguard Worker
31*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
32*993b0882SAndroid Build Coastguard Worker
UniLibBase()33*993b0882SAndroid Build Coastguard Worker UniLibBase::UniLibBase() {
34*993b0882SAndroid Build Coastguard Worker TC3_LOG(FATAL) << "Java ICU UniLib must be initialized with a JniCache.";
35*993b0882SAndroid Build Coastguard Worker }
36*993b0882SAndroid Build Coastguard Worker
UniLibBase(const std::shared_ptr<JniCache> & jni_cache)37*993b0882SAndroid Build Coastguard Worker UniLibBase::UniLibBase(const std::shared_ptr<JniCache>& jni_cache)
38*993b0882SAndroid Build Coastguard Worker : jni_cache_(jni_cache) {}
39*993b0882SAndroid Build Coastguard Worker
IsOpeningBracket(char32 codepoint) const40*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsOpeningBracket(char32 codepoint) const {
41*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::IsOpeningBracket(codepoint);
42*993b0882SAndroid Build Coastguard Worker }
43*993b0882SAndroid Build Coastguard Worker
IsClosingBracket(char32 codepoint) const44*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsClosingBracket(char32 codepoint) const {
45*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::IsClosingBracket(codepoint);
46*993b0882SAndroid Build Coastguard Worker }
47*993b0882SAndroid Build Coastguard Worker
IsWhitespace(char32 codepoint) const48*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsWhitespace(char32 codepoint) const {
49*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::IsWhitespace(codepoint);
50*993b0882SAndroid Build Coastguard Worker }
51*993b0882SAndroid Build Coastguard Worker
IsDigit(char32 codepoint) const52*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsDigit(char32 codepoint) const {
53*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::IsDigit(codepoint);
54*993b0882SAndroid Build Coastguard Worker }
55*993b0882SAndroid Build Coastguard Worker
IsLower(char32 codepoint) const56*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsLower(char32 codepoint) const {
57*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::IsLower(codepoint);
58*993b0882SAndroid Build Coastguard Worker }
59*993b0882SAndroid Build Coastguard Worker
IsUpper(char32 codepoint) const60*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsUpper(char32 codepoint) const {
61*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::IsUpper(codepoint);
62*993b0882SAndroid Build Coastguard Worker }
63*993b0882SAndroid Build Coastguard Worker
IsPunctuation(char32 codepoint) const64*993b0882SAndroid Build Coastguard Worker bool UniLibBase::IsPunctuation(char32 codepoint) const {
65*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::IsPunctuation(codepoint);
66*993b0882SAndroid Build Coastguard Worker }
67*993b0882SAndroid Build Coastguard Worker
ToLower(char32 codepoint) const68*993b0882SAndroid Build Coastguard Worker char32 UniLibBase::ToLower(char32 codepoint) const {
69*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::ToLower(codepoint);
70*993b0882SAndroid Build Coastguard Worker }
71*993b0882SAndroid Build Coastguard Worker
ToUpper(char32 codepoint) const72*993b0882SAndroid Build Coastguard Worker char32 UniLibBase::ToUpper(char32 codepoint) const {
73*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::ToUpper(codepoint);
74*993b0882SAndroid Build Coastguard Worker }
75*993b0882SAndroid Build Coastguard Worker
GetPairedBracket(char32 codepoint) const76*993b0882SAndroid Build Coastguard Worker char32 UniLibBase::GetPairedBracket(char32 codepoint) const {
77*993b0882SAndroid Build Coastguard Worker return libtextclassifier3::GetPairedBracket(codepoint);
78*993b0882SAndroid Build Coastguard Worker }
79*993b0882SAndroid Build Coastguard Worker
80*993b0882SAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
81*993b0882SAndroid Build Coastguard Worker // Implementations that call out to JVM. Behold the beauty.
82*993b0882SAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
83*993b0882SAndroid Build Coastguard Worker
Length(const UnicodeText & text) const84*993b0882SAndroid Build Coastguard Worker StatusOr<int32> UniLibBase::Length(const UnicodeText& text) const {
85*993b0882SAndroid Build Coastguard Worker TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jstring> text_java,
86*993b0882SAndroid Build Coastguard Worker jni_cache_->ConvertToJavaString(text));
87*993b0882SAndroid Build Coastguard Worker
88*993b0882SAndroid Build Coastguard Worker JNIEnv* jenv = jni_cache_->GetEnv();
89*993b0882SAndroid Build Coastguard Worker TC3_ASSIGN_OR_RETURN(int utf16_length,
90*993b0882SAndroid Build Coastguard Worker JniHelper::CallIntMethod(jenv, text_java.get(),
91*993b0882SAndroid Build Coastguard Worker jni_cache_->string_length));
92*993b0882SAndroid Build Coastguard Worker
93*993b0882SAndroid Build Coastguard Worker return JniHelper::CallIntMethod(jenv, text_java.get(),
94*993b0882SAndroid Build Coastguard Worker jni_cache_->string_code_point_count, 0,
95*993b0882SAndroid Build Coastguard Worker utf16_length);
96*993b0882SAndroid Build Coastguard Worker }
97*993b0882SAndroid Build Coastguard Worker
ParseInt32(const UnicodeText & text,int32 * result) const98*993b0882SAndroid Build Coastguard Worker bool UniLibBase::ParseInt32(const UnicodeText& text, int32* result) const {
99*993b0882SAndroid Build Coastguard Worker return ParseInt(text, result);
100*993b0882SAndroid Build Coastguard Worker }
101*993b0882SAndroid Build Coastguard Worker
ParseInt64(const UnicodeText & text,int64 * result) const102*993b0882SAndroid Build Coastguard Worker bool UniLibBase::ParseInt64(const UnicodeText& text, int64* result) const {
103*993b0882SAndroid Build Coastguard Worker return ParseInt(text, result);
104*993b0882SAndroid Build Coastguard Worker }
105*993b0882SAndroid Build Coastguard Worker
ParseDouble(const UnicodeText & text,double * result) const106*993b0882SAndroid Build Coastguard Worker bool UniLibBase::ParseDouble(const UnicodeText& text, double* result) const {
107*993b0882SAndroid Build Coastguard Worker if (!jni_cache_) {
108*993b0882SAndroid Build Coastguard Worker return false;
109*993b0882SAndroid Build Coastguard Worker }
110*993b0882SAndroid Build Coastguard Worker
111*993b0882SAndroid Build Coastguard Worker auto it_dot = text.begin();
112*993b0882SAndroid Build Coastguard Worker for (; it_dot != text.end() && !IsDot(*it_dot); it_dot++) {
113*993b0882SAndroid Build Coastguard Worker }
114*993b0882SAndroid Build Coastguard Worker
115*993b0882SAndroid Build Coastguard Worker int32 integer_part;
116*993b0882SAndroid Build Coastguard Worker if (!ParseInt(UnicodeText::Substring(text.begin(), it_dot, /*do_copy=*/false),
117*993b0882SAndroid Build Coastguard Worker &integer_part)) {
118*993b0882SAndroid Build Coastguard Worker return false;
119*993b0882SAndroid Build Coastguard Worker }
120*993b0882SAndroid Build Coastguard Worker
121*993b0882SAndroid Build Coastguard Worker int32 fractional_part = 0;
122*993b0882SAndroid Build Coastguard Worker if (it_dot != text.end()) {
123*993b0882SAndroid Build Coastguard Worker if (!ParseInt(
124*993b0882SAndroid Build Coastguard Worker UnicodeText::Substring(++it_dot, text.end(), /*do_copy=*/false),
125*993b0882SAndroid Build Coastguard Worker &fractional_part)) {
126*993b0882SAndroid Build Coastguard Worker return false;
127*993b0882SAndroid Build Coastguard Worker }
128*993b0882SAndroid Build Coastguard Worker }
129*993b0882SAndroid Build Coastguard Worker
130*993b0882SAndroid Build Coastguard Worker double factional_part_double = fractional_part;
131*993b0882SAndroid Build Coastguard Worker while (factional_part_double >= 1) {
132*993b0882SAndroid Build Coastguard Worker factional_part_double /= 10;
133*993b0882SAndroid Build Coastguard Worker }
134*993b0882SAndroid Build Coastguard Worker *result = integer_part + factional_part_double;
135*993b0882SAndroid Build Coastguard Worker
136*993b0882SAndroid Build Coastguard Worker return true;
137*993b0882SAndroid Build Coastguard Worker }
138*993b0882SAndroid Build Coastguard Worker
CreateRegexPattern(const UnicodeText & regex) const139*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLibBase::RegexPattern> UniLibBase::CreateRegexPattern(
140*993b0882SAndroid Build Coastguard Worker const UnicodeText& regex) const {
141*993b0882SAndroid Build Coastguard Worker return std::unique_ptr<UniLibBase::RegexPattern>(
142*993b0882SAndroid Build Coastguard Worker new UniLibBase::RegexPattern(jni_cache_.get(), regex, /*lazy=*/false));
143*993b0882SAndroid Build Coastguard Worker }
144*993b0882SAndroid Build Coastguard Worker
CreateLazyRegexPattern(const UnicodeText & regex) const145*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLibBase::RegexPattern> UniLibBase::CreateLazyRegexPattern(
146*993b0882SAndroid Build Coastguard Worker const UnicodeText& regex) const {
147*993b0882SAndroid Build Coastguard Worker return std::unique_ptr<UniLibBase::RegexPattern>(
148*993b0882SAndroid Build Coastguard Worker new UniLibBase::RegexPattern(jni_cache_.get(), regex, /*lazy=*/true));
149*993b0882SAndroid Build Coastguard Worker }
150*993b0882SAndroid Build Coastguard Worker
RegexPattern(const JniCache * jni_cache,const UnicodeText & pattern,bool lazy)151*993b0882SAndroid Build Coastguard Worker UniLibBase::RegexPattern::RegexPattern(const JniCache* jni_cache,
152*993b0882SAndroid Build Coastguard Worker const UnicodeText& pattern, bool lazy)
153*993b0882SAndroid Build Coastguard Worker : jni_cache_(jni_cache),
154*993b0882SAndroid Build Coastguard Worker pattern_(nullptr, jni_cache ? jni_cache->jvm : nullptr),
155*993b0882SAndroid Build Coastguard Worker initialized_(false),
156*993b0882SAndroid Build Coastguard Worker initialization_failure_(false),
157*993b0882SAndroid Build Coastguard Worker pattern_text_(pattern) {
158*993b0882SAndroid Build Coastguard Worker if (!lazy) {
159*993b0882SAndroid Build Coastguard Worker LockedInitializeIfNotAlready();
160*993b0882SAndroid Build Coastguard Worker }
161*993b0882SAndroid Build Coastguard Worker }
162*993b0882SAndroid Build Coastguard Worker
LockedInitializeIfNotAlready() const163*993b0882SAndroid Build Coastguard Worker Status UniLibBase::RegexPattern::LockedInitializeIfNotAlready() const {
164*993b0882SAndroid Build Coastguard Worker std::lock_guard<std::mutex> guard(mutex_);
165*993b0882SAndroid Build Coastguard Worker if (initialized_ || initialization_failure_) {
166*993b0882SAndroid Build Coastguard Worker return Status::OK;
167*993b0882SAndroid Build Coastguard Worker }
168*993b0882SAndroid Build Coastguard Worker
169*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
170*993b0882SAndroid Build Coastguard Worker JNIEnv* jenv = jni_cache_->GetEnv();
171*993b0882SAndroid Build Coastguard Worker initialization_failure_ = true;
172*993b0882SAndroid Build Coastguard Worker TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jstring> regex_java,
173*993b0882SAndroid Build Coastguard Worker jni_cache_->ConvertToJavaString(pattern_text_));
174*993b0882SAndroid Build Coastguard Worker TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jobject> pattern,
175*993b0882SAndroid Build Coastguard Worker JniHelper::CallStaticObjectMethod(
176*993b0882SAndroid Build Coastguard Worker jenv, jni_cache_->pattern_class.get(),
177*993b0882SAndroid Build Coastguard Worker jni_cache_->pattern_compile, regex_java.get()));
178*993b0882SAndroid Build Coastguard Worker pattern_ = MakeGlobalRef(pattern.get(), jenv, jni_cache_->jvm);
179*993b0882SAndroid Build Coastguard Worker if (pattern_ == nullptr) {
180*993b0882SAndroid Build Coastguard Worker return Status::UNKNOWN;
181*993b0882SAndroid Build Coastguard Worker }
182*993b0882SAndroid Build Coastguard Worker
183*993b0882SAndroid Build Coastguard Worker initialization_failure_ = false;
184*993b0882SAndroid Build Coastguard Worker initialized_ = true;
185*993b0882SAndroid Build Coastguard Worker pattern_text_.clear(); // We don't need this anymore.
186*993b0882SAndroid Build Coastguard Worker }
187*993b0882SAndroid Build Coastguard Worker return Status::OK;
188*993b0882SAndroid Build Coastguard Worker }
189*993b0882SAndroid Build Coastguard Worker
190*993b0882SAndroid Build Coastguard Worker constexpr int UniLibBase::RegexMatcher::kError;
191*993b0882SAndroid Build Coastguard Worker constexpr int UniLibBase::RegexMatcher::kNoError;
192*993b0882SAndroid Build Coastguard Worker
Matcher(const UnicodeText & context) const193*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLibBase::RegexMatcher> UniLibBase::RegexPattern::Matcher(
194*993b0882SAndroid Build Coastguard Worker const UnicodeText& context) const {
195*993b0882SAndroid Build Coastguard Worker LockedInitializeIfNotAlready(); // Possibly lazy initialization.
196*993b0882SAndroid Build Coastguard Worker if (initialization_failure_) {
197*993b0882SAndroid Build Coastguard Worker return nullptr;
198*993b0882SAndroid Build Coastguard Worker }
199*993b0882SAndroid Build Coastguard Worker
200*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
201*993b0882SAndroid Build Coastguard Worker JNIEnv* env = jni_cache_->GetEnv();
202*993b0882SAndroid Build Coastguard Worker const StatusOr<ScopedLocalRef<jstring>> status_or_context_java =
203*993b0882SAndroid Build Coastguard Worker jni_cache_->ConvertToJavaString(context);
204*993b0882SAndroid Build Coastguard Worker if (!status_or_context_java.ok() || !status_or_context_java.ValueOrDie()) {
205*993b0882SAndroid Build Coastguard Worker return nullptr;
206*993b0882SAndroid Build Coastguard Worker }
207*993b0882SAndroid Build Coastguard Worker const StatusOr<ScopedLocalRef<jobject>> status_or_matcher =
208*993b0882SAndroid Build Coastguard Worker JniHelper::CallObjectMethod(env, pattern_.get(),
209*993b0882SAndroid Build Coastguard Worker jni_cache_->pattern_matcher,
210*993b0882SAndroid Build Coastguard Worker status_or_context_java.ValueOrDie().get());
211*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear() || !status_or_matcher.ok() ||
212*993b0882SAndroid Build Coastguard Worker !status_or_matcher.ValueOrDie()) {
213*993b0882SAndroid Build Coastguard Worker return nullptr;
214*993b0882SAndroid Build Coastguard Worker }
215*993b0882SAndroid Build Coastguard Worker return std::unique_ptr<UniLibBase::RegexMatcher>(new RegexMatcher(
216*993b0882SAndroid Build Coastguard Worker jni_cache_,
217*993b0882SAndroid Build Coastguard Worker MakeGlobalRef(status_or_matcher.ValueOrDie().get(), env,
218*993b0882SAndroid Build Coastguard Worker jni_cache_->jvm),
219*993b0882SAndroid Build Coastguard Worker MakeGlobalRef(status_or_context_java.ValueOrDie().get(), env,
220*993b0882SAndroid Build Coastguard Worker jni_cache_->jvm)));
221*993b0882SAndroid Build Coastguard Worker } else {
222*993b0882SAndroid Build Coastguard Worker // NOTE: A valid object needs to be created here to pass the interface
223*993b0882SAndroid Build Coastguard Worker // tests.
224*993b0882SAndroid Build Coastguard Worker return std::unique_ptr<UniLibBase::RegexMatcher>(
225*993b0882SAndroid Build Coastguard Worker new RegexMatcher(jni_cache_, {}, {}));
226*993b0882SAndroid Build Coastguard Worker }
227*993b0882SAndroid Build Coastguard Worker }
228*993b0882SAndroid Build Coastguard Worker
RegexMatcher(const JniCache * jni_cache,ScopedGlobalRef<jobject> matcher,ScopedGlobalRef<jstring> text)229*993b0882SAndroid Build Coastguard Worker UniLibBase::RegexMatcher::RegexMatcher(const JniCache* jni_cache,
230*993b0882SAndroid Build Coastguard Worker ScopedGlobalRef<jobject> matcher,
231*993b0882SAndroid Build Coastguard Worker ScopedGlobalRef<jstring> text)
232*993b0882SAndroid Build Coastguard Worker : jni_cache_(jni_cache),
233*993b0882SAndroid Build Coastguard Worker matcher_(std::move(matcher)),
234*993b0882SAndroid Build Coastguard Worker text_(std::move(text)) {}
235*993b0882SAndroid Build Coastguard Worker
Matches(int * status) const236*993b0882SAndroid Build Coastguard Worker bool UniLibBase::RegexMatcher::Matches(int* status) const {
237*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
238*993b0882SAndroid Build Coastguard Worker *status = kNoError;
239*993b0882SAndroid Build Coastguard Worker const bool result = jni_cache_->GetEnv()->CallBooleanMethod(
240*993b0882SAndroid Build Coastguard Worker matcher_.get(), jni_cache_->matcher_matches);
241*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
242*993b0882SAndroid Build Coastguard Worker *status = kError;
243*993b0882SAndroid Build Coastguard Worker return false;
244*993b0882SAndroid Build Coastguard Worker }
245*993b0882SAndroid Build Coastguard Worker return result;
246*993b0882SAndroid Build Coastguard Worker } else {
247*993b0882SAndroid Build Coastguard Worker *status = kError;
248*993b0882SAndroid Build Coastguard Worker return false;
249*993b0882SAndroid Build Coastguard Worker }
250*993b0882SAndroid Build Coastguard Worker }
251*993b0882SAndroid Build Coastguard Worker
ApproximatelyMatches(int * status)252*993b0882SAndroid Build Coastguard Worker bool UniLibBase::RegexMatcher::ApproximatelyMatches(int* status) {
253*993b0882SAndroid Build Coastguard Worker *status = kNoError;
254*993b0882SAndroid Build Coastguard Worker
255*993b0882SAndroid Build Coastguard Worker jni_cache_->GetEnv()->CallObjectMethod(matcher_.get(),
256*993b0882SAndroid Build Coastguard Worker jni_cache_->matcher_reset);
257*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
258*993b0882SAndroid Build Coastguard Worker *status = kError;
259*993b0882SAndroid Build Coastguard Worker return kError;
260*993b0882SAndroid Build Coastguard Worker }
261*993b0882SAndroid Build Coastguard Worker
262*993b0882SAndroid Build Coastguard Worker if (!Find(status) || *status != kNoError) {
263*993b0882SAndroid Build Coastguard Worker return false;
264*993b0882SAndroid Build Coastguard Worker }
265*993b0882SAndroid Build Coastguard Worker
266*993b0882SAndroid Build Coastguard Worker const int found_start = jni_cache_->GetEnv()->CallIntMethod(
267*993b0882SAndroid Build Coastguard Worker matcher_.get(), jni_cache_->matcher_start_idx, 0);
268*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
269*993b0882SAndroid Build Coastguard Worker *status = kError;
270*993b0882SAndroid Build Coastguard Worker return kError;
271*993b0882SAndroid Build Coastguard Worker }
272*993b0882SAndroid Build Coastguard Worker
273*993b0882SAndroid Build Coastguard Worker const int found_end = jni_cache_->GetEnv()->CallIntMethod(
274*993b0882SAndroid Build Coastguard Worker matcher_.get(), jni_cache_->matcher_end_idx, 0);
275*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
276*993b0882SAndroid Build Coastguard Worker *status = kError;
277*993b0882SAndroid Build Coastguard Worker return kError;
278*993b0882SAndroid Build Coastguard Worker }
279*993b0882SAndroid Build Coastguard Worker
280*993b0882SAndroid Build Coastguard Worker int context_length_bmp = jni_cache_->GetEnv()->CallIntMethod(
281*993b0882SAndroid Build Coastguard Worker text_.get(), jni_cache_->string_length);
282*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
283*993b0882SAndroid Build Coastguard Worker *status = kError;
284*993b0882SAndroid Build Coastguard Worker return false;
285*993b0882SAndroid Build Coastguard Worker }
286*993b0882SAndroid Build Coastguard Worker
287*993b0882SAndroid Build Coastguard Worker if (found_start != 0 || found_end != context_length_bmp) {
288*993b0882SAndroid Build Coastguard Worker return false;
289*993b0882SAndroid Build Coastguard Worker }
290*993b0882SAndroid Build Coastguard Worker
291*993b0882SAndroid Build Coastguard Worker return true;
292*993b0882SAndroid Build Coastguard Worker }
293*993b0882SAndroid Build Coastguard Worker
UpdateLastFindOffset() const294*993b0882SAndroid Build Coastguard Worker bool UniLibBase::RegexMatcher::UpdateLastFindOffset() const {
295*993b0882SAndroid Build Coastguard Worker if (!last_find_offset_dirty_) {
296*993b0882SAndroid Build Coastguard Worker return true;
297*993b0882SAndroid Build Coastguard Worker }
298*993b0882SAndroid Build Coastguard Worker
299*993b0882SAndroid Build Coastguard Worker const int find_offset = jni_cache_->GetEnv()->CallIntMethod(
300*993b0882SAndroid Build Coastguard Worker matcher_.get(), jni_cache_->matcher_start_idx, 0);
301*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
302*993b0882SAndroid Build Coastguard Worker return false;
303*993b0882SAndroid Build Coastguard Worker }
304*993b0882SAndroid Build Coastguard Worker
305*993b0882SAndroid Build Coastguard Worker const int codepoint_count = jni_cache_->GetEnv()->CallIntMethod(
306*993b0882SAndroid Build Coastguard Worker text_.get(), jni_cache_->string_code_point_count, last_find_offset_,
307*993b0882SAndroid Build Coastguard Worker find_offset);
308*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
309*993b0882SAndroid Build Coastguard Worker return false;
310*993b0882SAndroid Build Coastguard Worker }
311*993b0882SAndroid Build Coastguard Worker
312*993b0882SAndroid Build Coastguard Worker last_find_offset_codepoints_ += codepoint_count;
313*993b0882SAndroid Build Coastguard Worker last_find_offset_ = find_offset;
314*993b0882SAndroid Build Coastguard Worker last_find_offset_dirty_ = false;
315*993b0882SAndroid Build Coastguard Worker
316*993b0882SAndroid Build Coastguard Worker return true;
317*993b0882SAndroid Build Coastguard Worker }
318*993b0882SAndroid Build Coastguard Worker
Find(int * status)319*993b0882SAndroid Build Coastguard Worker bool UniLibBase::RegexMatcher::Find(int* status) {
320*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
321*993b0882SAndroid Build Coastguard Worker const bool result = jni_cache_->GetEnv()->CallBooleanMethod(
322*993b0882SAndroid Build Coastguard Worker matcher_.get(), jni_cache_->matcher_find);
323*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
324*993b0882SAndroid Build Coastguard Worker *status = kError;
325*993b0882SAndroid Build Coastguard Worker return false;
326*993b0882SAndroid Build Coastguard Worker }
327*993b0882SAndroid Build Coastguard Worker
328*993b0882SAndroid Build Coastguard Worker last_find_offset_dirty_ = true;
329*993b0882SAndroid Build Coastguard Worker *status = kNoError;
330*993b0882SAndroid Build Coastguard Worker return result;
331*993b0882SAndroid Build Coastguard Worker } else {
332*993b0882SAndroid Build Coastguard Worker *status = kError;
333*993b0882SAndroid Build Coastguard Worker return false;
334*993b0882SAndroid Build Coastguard Worker }
335*993b0882SAndroid Build Coastguard Worker }
336*993b0882SAndroid Build Coastguard Worker
Start(int * status) const337*993b0882SAndroid Build Coastguard Worker int UniLibBase::RegexMatcher::Start(int* status) const {
338*993b0882SAndroid Build Coastguard Worker return Start(/*group_idx=*/0, status);
339*993b0882SAndroid Build Coastguard Worker }
340*993b0882SAndroid Build Coastguard Worker
Start(int group_idx,int * status) const341*993b0882SAndroid Build Coastguard Worker int UniLibBase::RegexMatcher::Start(int group_idx, int* status) const {
342*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
343*993b0882SAndroid Build Coastguard Worker *status = kNoError;
344*993b0882SAndroid Build Coastguard Worker
345*993b0882SAndroid Build Coastguard Worker if (!UpdateLastFindOffset()) {
346*993b0882SAndroid Build Coastguard Worker *status = kError;
347*993b0882SAndroid Build Coastguard Worker return kError;
348*993b0882SAndroid Build Coastguard Worker }
349*993b0882SAndroid Build Coastguard Worker
350*993b0882SAndroid Build Coastguard Worker const int java_index = jni_cache_->GetEnv()->CallIntMethod(
351*993b0882SAndroid Build Coastguard Worker matcher_.get(), jni_cache_->matcher_start_idx, group_idx);
352*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
353*993b0882SAndroid Build Coastguard Worker *status = kError;
354*993b0882SAndroid Build Coastguard Worker return kError;
355*993b0882SAndroid Build Coastguard Worker }
356*993b0882SAndroid Build Coastguard Worker
357*993b0882SAndroid Build Coastguard Worker // If the group didn't participate in the match the index is -1.
358*993b0882SAndroid Build Coastguard Worker if (java_index == -1) {
359*993b0882SAndroid Build Coastguard Worker return -1;
360*993b0882SAndroid Build Coastguard Worker }
361*993b0882SAndroid Build Coastguard Worker
362*993b0882SAndroid Build Coastguard Worker const int unicode_index = jni_cache_->GetEnv()->CallIntMethod(
363*993b0882SAndroid Build Coastguard Worker text_.get(), jni_cache_->string_code_point_count, last_find_offset_,
364*993b0882SAndroid Build Coastguard Worker java_index);
365*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
366*993b0882SAndroid Build Coastguard Worker *status = kError;
367*993b0882SAndroid Build Coastguard Worker return kError;
368*993b0882SAndroid Build Coastguard Worker }
369*993b0882SAndroid Build Coastguard Worker
370*993b0882SAndroid Build Coastguard Worker return unicode_index + last_find_offset_codepoints_;
371*993b0882SAndroid Build Coastguard Worker } else {
372*993b0882SAndroid Build Coastguard Worker *status = kError;
373*993b0882SAndroid Build Coastguard Worker return kError;
374*993b0882SAndroid Build Coastguard Worker }
375*993b0882SAndroid Build Coastguard Worker }
376*993b0882SAndroid Build Coastguard Worker
End(int * status) const377*993b0882SAndroid Build Coastguard Worker int UniLibBase::RegexMatcher::End(int* status) const {
378*993b0882SAndroid Build Coastguard Worker return End(/*group_idx=*/0, status);
379*993b0882SAndroid Build Coastguard Worker }
380*993b0882SAndroid Build Coastguard Worker
End(int group_idx,int * status) const381*993b0882SAndroid Build Coastguard Worker int UniLibBase::RegexMatcher::End(int group_idx, int* status) const {
382*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
383*993b0882SAndroid Build Coastguard Worker *status = kNoError;
384*993b0882SAndroid Build Coastguard Worker
385*993b0882SAndroid Build Coastguard Worker if (!UpdateLastFindOffset()) {
386*993b0882SAndroid Build Coastguard Worker *status = kError;
387*993b0882SAndroid Build Coastguard Worker return kError;
388*993b0882SAndroid Build Coastguard Worker }
389*993b0882SAndroid Build Coastguard Worker
390*993b0882SAndroid Build Coastguard Worker const int java_index = jni_cache_->GetEnv()->CallIntMethod(
391*993b0882SAndroid Build Coastguard Worker matcher_.get(), jni_cache_->matcher_end_idx, group_idx);
392*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
393*993b0882SAndroid Build Coastguard Worker *status = kError;
394*993b0882SAndroid Build Coastguard Worker return kError;
395*993b0882SAndroid Build Coastguard Worker }
396*993b0882SAndroid Build Coastguard Worker
397*993b0882SAndroid Build Coastguard Worker // If the group didn't participate in the match the index is -1.
398*993b0882SAndroid Build Coastguard Worker if (java_index == -1) {
399*993b0882SAndroid Build Coastguard Worker return -1;
400*993b0882SAndroid Build Coastguard Worker }
401*993b0882SAndroid Build Coastguard Worker
402*993b0882SAndroid Build Coastguard Worker const int unicode_index = jni_cache_->GetEnv()->CallIntMethod(
403*993b0882SAndroid Build Coastguard Worker text_.get(), jni_cache_->string_code_point_count, last_find_offset_,
404*993b0882SAndroid Build Coastguard Worker java_index);
405*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
406*993b0882SAndroid Build Coastguard Worker *status = kError;
407*993b0882SAndroid Build Coastguard Worker return kError;
408*993b0882SAndroid Build Coastguard Worker }
409*993b0882SAndroid Build Coastguard Worker
410*993b0882SAndroid Build Coastguard Worker return unicode_index + last_find_offset_codepoints_;
411*993b0882SAndroid Build Coastguard Worker } else {
412*993b0882SAndroid Build Coastguard Worker *status = kError;
413*993b0882SAndroid Build Coastguard Worker return kError;
414*993b0882SAndroid Build Coastguard Worker }
415*993b0882SAndroid Build Coastguard Worker }
416*993b0882SAndroid Build Coastguard Worker
Group(int * status) const417*993b0882SAndroid Build Coastguard Worker UnicodeText UniLibBase::RegexMatcher::Group(int* status) const {
418*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
419*993b0882SAndroid Build Coastguard Worker JNIEnv* jenv = jni_cache_->GetEnv();
420*993b0882SAndroid Build Coastguard Worker StatusOr<ScopedLocalRef<jstring>> status_or_java_result =
421*993b0882SAndroid Build Coastguard Worker JniHelper::CallObjectMethod<jstring>(jenv, matcher_.get(),
422*993b0882SAndroid Build Coastguard Worker jni_cache_->matcher_group);
423*993b0882SAndroid Build Coastguard Worker
424*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear() || !status_or_java_result.ok() ||
425*993b0882SAndroid Build Coastguard Worker !status_or_java_result.ValueOrDie()) {
426*993b0882SAndroid Build Coastguard Worker *status = kError;
427*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText("", /*do_copy=*/false);
428*993b0882SAndroid Build Coastguard Worker }
429*993b0882SAndroid Build Coastguard Worker
430*993b0882SAndroid Build Coastguard Worker StatusOr<std::string> status_or_result =
431*993b0882SAndroid Build Coastguard Worker JStringToUtf8String(jenv, status_or_java_result.ValueOrDie().get());
432*993b0882SAndroid Build Coastguard Worker if (!status_or_result.ok()) {
433*993b0882SAndroid Build Coastguard Worker *status = kError;
434*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText("", /*do_copy=*/false);
435*993b0882SAndroid Build Coastguard Worker }
436*993b0882SAndroid Build Coastguard Worker *status = kNoError;
437*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText(status_or_result.ValueOrDie(), /*do_copy=*/true);
438*993b0882SAndroid Build Coastguard Worker } else {
439*993b0882SAndroid Build Coastguard Worker *status = kError;
440*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText("", /*do_copy=*/false);
441*993b0882SAndroid Build Coastguard Worker }
442*993b0882SAndroid Build Coastguard Worker }
443*993b0882SAndroid Build Coastguard Worker
Group(int group_idx,int * status) const444*993b0882SAndroid Build Coastguard Worker UnicodeText UniLibBase::RegexMatcher::Group(int group_idx, int* status) const {
445*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
446*993b0882SAndroid Build Coastguard Worker JNIEnv* jenv = jni_cache_->GetEnv();
447*993b0882SAndroid Build Coastguard Worker
448*993b0882SAndroid Build Coastguard Worker StatusOr<ScopedLocalRef<jstring>> status_or_java_result =
449*993b0882SAndroid Build Coastguard Worker JniHelper::CallObjectMethod<jstring>(
450*993b0882SAndroid Build Coastguard Worker jenv, matcher_.get(), jni_cache_->matcher_group_idx, group_idx);
451*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear() || !status_or_java_result.ok()) {
452*993b0882SAndroid Build Coastguard Worker *status = kError;
453*993b0882SAndroid Build Coastguard Worker TC3_LOG(ERROR) << "Exception occurred";
454*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText("", /*do_copy=*/false);
455*993b0882SAndroid Build Coastguard Worker }
456*993b0882SAndroid Build Coastguard Worker
457*993b0882SAndroid Build Coastguard Worker // java_result is nullptr when the group did not participate in the match.
458*993b0882SAndroid Build Coastguard Worker // For these cases other UniLib implementations return empty string, and
459*993b0882SAndroid Build Coastguard Worker // the participation can be checked by checking if Start() == -1.
460*993b0882SAndroid Build Coastguard Worker if (!status_or_java_result.ValueOrDie()) {
461*993b0882SAndroid Build Coastguard Worker *status = kNoError;
462*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText("", /*do_copy=*/false);
463*993b0882SAndroid Build Coastguard Worker }
464*993b0882SAndroid Build Coastguard Worker
465*993b0882SAndroid Build Coastguard Worker StatusOr<std::string> status_or_result =
466*993b0882SAndroid Build Coastguard Worker JStringToUtf8String(jenv, status_or_java_result.ValueOrDie().get());
467*993b0882SAndroid Build Coastguard Worker if (!status_or_result.ok()) {
468*993b0882SAndroid Build Coastguard Worker *status = kError;
469*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText("", /*do_copy=*/false);
470*993b0882SAndroid Build Coastguard Worker }
471*993b0882SAndroid Build Coastguard Worker *status = kNoError;
472*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText(status_or_result.ValueOrDie(), /*do_copy=*/true);
473*993b0882SAndroid Build Coastguard Worker } else {
474*993b0882SAndroid Build Coastguard Worker *status = kError;
475*993b0882SAndroid Build Coastguard Worker return UTF8ToUnicodeText("", /*do_copy=*/false);
476*993b0882SAndroid Build Coastguard Worker }
477*993b0882SAndroid Build Coastguard Worker }
478*993b0882SAndroid Build Coastguard Worker
479*993b0882SAndroid Build Coastguard Worker constexpr int UniLibBase::BreakIterator::kDone;
480*993b0882SAndroid Build Coastguard Worker
BreakIterator(const JniCache * jni_cache,const UnicodeText & text)481*993b0882SAndroid Build Coastguard Worker UniLibBase::BreakIterator::BreakIterator(const JniCache* jni_cache,
482*993b0882SAndroid Build Coastguard Worker const UnicodeText& text)
483*993b0882SAndroid Build Coastguard Worker : jni_cache_(jni_cache),
484*993b0882SAndroid Build Coastguard Worker text_(nullptr, jni_cache ? jni_cache->jvm : nullptr),
485*993b0882SAndroid Build Coastguard Worker iterator_(nullptr, jni_cache ? jni_cache->jvm : nullptr),
486*993b0882SAndroid Build Coastguard Worker last_break_index_(0),
487*993b0882SAndroid Build Coastguard Worker last_unicode_index_(0) {
488*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
489*993b0882SAndroid Build Coastguard Worker JNIEnv* jenv = jni_cache_->GetEnv();
490*993b0882SAndroid Build Coastguard Worker StatusOr<ScopedLocalRef<jstring>> status_or_text =
491*993b0882SAndroid Build Coastguard Worker jni_cache_->ConvertToJavaString(text);
492*993b0882SAndroid Build Coastguard Worker if (!status_or_text.ok()) {
493*993b0882SAndroid Build Coastguard Worker return;
494*993b0882SAndroid Build Coastguard Worker }
495*993b0882SAndroid Build Coastguard Worker text_ =
496*993b0882SAndroid Build Coastguard Worker MakeGlobalRef(status_or_text.ValueOrDie().get(), jenv, jni_cache->jvm);
497*993b0882SAndroid Build Coastguard Worker if (!text_) {
498*993b0882SAndroid Build Coastguard Worker return;
499*993b0882SAndroid Build Coastguard Worker }
500*993b0882SAndroid Build Coastguard Worker
501*993b0882SAndroid Build Coastguard Worker StatusOr<ScopedLocalRef<jobject>> status_or_iterator =
502*993b0882SAndroid Build Coastguard Worker JniHelper::CallStaticObjectMethod(
503*993b0882SAndroid Build Coastguard Worker jenv, jni_cache->breakiterator_class.get(),
504*993b0882SAndroid Build Coastguard Worker jni_cache->breakiterator_getwordinstance,
505*993b0882SAndroid Build Coastguard Worker jni_cache->locale_us.get());
506*993b0882SAndroid Build Coastguard Worker if (!status_or_iterator.ok()) {
507*993b0882SAndroid Build Coastguard Worker return;
508*993b0882SAndroid Build Coastguard Worker }
509*993b0882SAndroid Build Coastguard Worker iterator_ = MakeGlobalRef(status_or_iterator.ValueOrDie().get(), jenv,
510*993b0882SAndroid Build Coastguard Worker jni_cache->jvm);
511*993b0882SAndroid Build Coastguard Worker if (!iterator_) {
512*993b0882SAndroid Build Coastguard Worker return;
513*993b0882SAndroid Build Coastguard Worker }
514*993b0882SAndroid Build Coastguard Worker JniHelper::CallVoidMethod(jenv, iterator_.get(),
515*993b0882SAndroid Build Coastguard Worker jni_cache->breakiterator_settext, text_.get());
516*993b0882SAndroid Build Coastguard Worker }
517*993b0882SAndroid Build Coastguard Worker }
518*993b0882SAndroid Build Coastguard Worker
Next()519*993b0882SAndroid Build Coastguard Worker int UniLibBase::BreakIterator::Next() {
520*993b0882SAndroid Build Coastguard Worker if (jni_cache_) {
521*993b0882SAndroid Build Coastguard Worker const int break_index = jni_cache_->GetEnv()->CallIntMethod(
522*993b0882SAndroid Build Coastguard Worker iterator_.get(), jni_cache_->breakiterator_next);
523*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear() ||
524*993b0882SAndroid Build Coastguard Worker break_index == BreakIterator::kDone) {
525*993b0882SAndroid Build Coastguard Worker return BreakIterator::kDone;
526*993b0882SAndroid Build Coastguard Worker }
527*993b0882SAndroid Build Coastguard Worker
528*993b0882SAndroid Build Coastguard Worker const int token_unicode_length = jni_cache_->GetEnv()->CallIntMethod(
529*993b0882SAndroid Build Coastguard Worker text_.get(), jni_cache_->string_code_point_count, last_break_index_,
530*993b0882SAndroid Build Coastguard Worker break_index);
531*993b0882SAndroid Build Coastguard Worker if (jni_cache_->ExceptionCheckAndClear()) {
532*993b0882SAndroid Build Coastguard Worker return BreakIterator::kDone;
533*993b0882SAndroid Build Coastguard Worker }
534*993b0882SAndroid Build Coastguard Worker
535*993b0882SAndroid Build Coastguard Worker last_break_index_ = break_index;
536*993b0882SAndroid Build Coastguard Worker return last_unicode_index_ += token_unicode_length;
537*993b0882SAndroid Build Coastguard Worker }
538*993b0882SAndroid Build Coastguard Worker return BreakIterator::kDone;
539*993b0882SAndroid Build Coastguard Worker }
540*993b0882SAndroid Build Coastguard Worker
CreateBreakIterator(const UnicodeText & text) const541*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLibBase::BreakIterator> UniLibBase::CreateBreakIterator(
542*993b0882SAndroid Build Coastguard Worker const UnicodeText& text) const {
543*993b0882SAndroid Build Coastguard Worker return std::unique_ptr<UniLibBase::BreakIterator>(
544*993b0882SAndroid Build Coastguard Worker new UniLibBase::BreakIterator(jni_cache_.get(), text));
545*993b0882SAndroid Build Coastguard Worker }
546*993b0882SAndroid Build Coastguard Worker
547*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3
548