xref: /aosp_15_r20/external/icu/icu4c/source/common/lstmbe.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2021 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker 
4*0e209d39SAndroid Build Coastguard Worker #include <complex>
5*0e209d39SAndroid Build Coastguard Worker #include <utility>
6*0e209d39SAndroid Build Coastguard Worker 
7*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
8*0e209d39SAndroid Build Coastguard Worker 
9*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION
10*0e209d39SAndroid Build Coastguard Worker 
11*0e209d39SAndroid Build Coastguard Worker #include "brkeng.h"
12*0e209d39SAndroid Build Coastguard Worker #include "charstr.h"
13*0e209d39SAndroid Build Coastguard Worker #include "cmemory.h"
14*0e209d39SAndroid Build Coastguard Worker #include "lstmbe.h"
15*0e209d39SAndroid Build Coastguard Worker #include "putilimp.h"
16*0e209d39SAndroid Build Coastguard Worker #include "uassert.h"
17*0e209d39SAndroid Build Coastguard Worker #include "ubrkimpl.h"
18*0e209d39SAndroid Build Coastguard Worker #include "uresimp.h"
19*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h"
20*0e209d39SAndroid Build Coastguard Worker #include "uvector.h"
21*0e209d39SAndroid Build Coastguard Worker 
22*0e209d39SAndroid Build Coastguard Worker #include "unicode/brkiter.h"
23*0e209d39SAndroid Build Coastguard Worker #include "unicode/resbund.h"
24*0e209d39SAndroid Build Coastguard Worker #include "unicode/ubrk.h"
25*0e209d39SAndroid Build Coastguard Worker #include "unicode/uniset.h"
26*0e209d39SAndroid Build Coastguard Worker #include "unicode/ustring.h"
27*0e209d39SAndroid Build Coastguard Worker #include "unicode/utf.h"
28*0e209d39SAndroid Build Coastguard Worker 
29*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
30*0e209d39SAndroid Build Coastguard Worker 
31*0e209d39SAndroid Build Coastguard Worker // Uncomment the following #define to debug.
32*0e209d39SAndroid Build Coastguard Worker // #define LSTM_DEBUG 1
33*0e209d39SAndroid Build Coastguard Worker // #define LSTM_VECTORIZER_DEBUG 1
34*0e209d39SAndroid Build Coastguard Worker 
35*0e209d39SAndroid Build Coastguard Worker /**
36*0e209d39SAndroid Build Coastguard Worker  * Interface for reading 1D array.
37*0e209d39SAndroid Build Coastguard Worker  */
38*0e209d39SAndroid Build Coastguard Worker class ReadArray1D {
39*0e209d39SAndroid Build Coastguard Worker public:
40*0e209d39SAndroid Build Coastguard Worker     virtual ~ReadArray1D();
41*0e209d39SAndroid Build Coastguard Worker     virtual int32_t d1() const = 0;
42*0e209d39SAndroid Build Coastguard Worker     virtual float get(int32_t i) const = 0;
43*0e209d39SAndroid Build Coastguard Worker 
44*0e209d39SAndroid Build Coastguard Worker #ifdef LSTM_DEBUG
print() const45*0e209d39SAndroid Build Coastguard Worker     void print() const {
46*0e209d39SAndroid Build Coastguard Worker         printf("\n[");
47*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 0; i < d1(); i++) {
48*0e209d39SAndroid Build Coastguard Worker            printf("%0.8e ", get(i));
49*0e209d39SAndroid Build Coastguard Worker            if (i % 4 == 3) printf("\n");
50*0e209d39SAndroid Build Coastguard Worker         }
51*0e209d39SAndroid Build Coastguard Worker         printf("]\n");
52*0e209d39SAndroid Build Coastguard Worker     }
53*0e209d39SAndroid Build Coastguard Worker #endif
54*0e209d39SAndroid Build Coastguard Worker };
55*0e209d39SAndroid Build Coastguard Worker 
~ReadArray1D()56*0e209d39SAndroid Build Coastguard Worker ReadArray1D::~ReadArray1D()
57*0e209d39SAndroid Build Coastguard Worker {
58*0e209d39SAndroid Build Coastguard Worker }
59*0e209d39SAndroid Build Coastguard Worker 
60*0e209d39SAndroid Build Coastguard Worker /**
61*0e209d39SAndroid Build Coastguard Worker  * Interface for reading 2D array.
62*0e209d39SAndroid Build Coastguard Worker  */
63*0e209d39SAndroid Build Coastguard Worker class ReadArray2D {
64*0e209d39SAndroid Build Coastguard Worker public:
65*0e209d39SAndroid Build Coastguard Worker     virtual ~ReadArray2D();
66*0e209d39SAndroid Build Coastguard Worker     virtual int32_t d1() const = 0;
67*0e209d39SAndroid Build Coastguard Worker     virtual int32_t d2() const = 0;
68*0e209d39SAndroid Build Coastguard Worker     virtual float get(int32_t i, int32_t j) const = 0;
69*0e209d39SAndroid Build Coastguard Worker };
70*0e209d39SAndroid Build Coastguard Worker 
~ReadArray2D()71*0e209d39SAndroid Build Coastguard Worker ReadArray2D::~ReadArray2D()
72*0e209d39SAndroid Build Coastguard Worker {
73*0e209d39SAndroid Build Coastguard Worker }
74*0e209d39SAndroid Build Coastguard Worker 
75*0e209d39SAndroid Build Coastguard Worker /**
76*0e209d39SAndroid Build Coastguard Worker  * A class to index a float array as a 1D Array without owning the pointer or
77*0e209d39SAndroid Build Coastguard Worker  * copy the data.
78*0e209d39SAndroid Build Coastguard Worker  */
79*0e209d39SAndroid Build Coastguard Worker class ConstArray1D : public ReadArray1D {
80*0e209d39SAndroid Build Coastguard Worker public:
ConstArray1D()81*0e209d39SAndroid Build Coastguard Worker     ConstArray1D() : data_(nullptr), d1_(0) {}
82*0e209d39SAndroid Build Coastguard Worker 
ConstArray1D(const float * data,int32_t d1)83*0e209d39SAndroid Build Coastguard Worker     ConstArray1D(const float* data, int32_t d1) : data_(data), d1_(d1) {}
84*0e209d39SAndroid Build Coastguard Worker 
85*0e209d39SAndroid Build Coastguard Worker     virtual ~ConstArray1D();
86*0e209d39SAndroid Build Coastguard Worker 
87*0e209d39SAndroid Build Coastguard Worker     // Init the object, the object does not own the data nor copy.
88*0e209d39SAndroid Build Coastguard Worker     // It is designed to directly use data from memory mapped resources.
init(const int32_t * data,int32_t d1)89*0e209d39SAndroid Build Coastguard Worker     void init(const int32_t* data, int32_t d1) {
90*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(IEEE_754 == 1);
91*0e209d39SAndroid Build Coastguard Worker         data_ = reinterpret_cast<const float*>(data);
92*0e209d39SAndroid Build Coastguard Worker         d1_ = d1;
93*0e209d39SAndroid Build Coastguard Worker     }
94*0e209d39SAndroid Build Coastguard Worker 
95*0e209d39SAndroid Build Coastguard Worker     // ReadArray1D methods.
d1() const96*0e209d39SAndroid Build Coastguard Worker     virtual int32_t d1() const override { return d1_; }
get(int32_t i) const97*0e209d39SAndroid Build Coastguard Worker     virtual float get(int32_t i) const override {
98*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(i < d1_);
99*0e209d39SAndroid Build Coastguard Worker         return data_[i];
100*0e209d39SAndroid Build Coastguard Worker     }
101*0e209d39SAndroid Build Coastguard Worker 
102*0e209d39SAndroid Build Coastguard Worker private:
103*0e209d39SAndroid Build Coastguard Worker     const float* data_;
104*0e209d39SAndroid Build Coastguard Worker     int32_t d1_;
105*0e209d39SAndroid Build Coastguard Worker };
106*0e209d39SAndroid Build Coastguard Worker 
~ConstArray1D()107*0e209d39SAndroid Build Coastguard Worker ConstArray1D::~ConstArray1D()
108*0e209d39SAndroid Build Coastguard Worker {
109*0e209d39SAndroid Build Coastguard Worker }
110*0e209d39SAndroid Build Coastguard Worker 
111*0e209d39SAndroid Build Coastguard Worker /**
112*0e209d39SAndroid Build Coastguard Worker  * A class to index a float array as a 2D Array without owning the pointer or
113*0e209d39SAndroid Build Coastguard Worker  * copy the data.
114*0e209d39SAndroid Build Coastguard Worker  */
115*0e209d39SAndroid Build Coastguard Worker class ConstArray2D : public ReadArray2D {
116*0e209d39SAndroid Build Coastguard Worker public:
ConstArray2D()117*0e209d39SAndroid Build Coastguard Worker     ConstArray2D() : data_(nullptr), d1_(0), d2_(0) {}
118*0e209d39SAndroid Build Coastguard Worker 
ConstArray2D(const float * data,int32_t d1,int32_t d2)119*0e209d39SAndroid Build Coastguard Worker     ConstArray2D(const float* data, int32_t d1, int32_t d2)
120*0e209d39SAndroid Build Coastguard Worker         : data_(data), d1_(d1), d2_(d2) {}
121*0e209d39SAndroid Build Coastguard Worker 
122*0e209d39SAndroid Build Coastguard Worker     virtual ~ConstArray2D();
123*0e209d39SAndroid Build Coastguard Worker 
124*0e209d39SAndroid Build Coastguard Worker     // Init the object, the object does not own the data nor copy.
125*0e209d39SAndroid Build Coastguard Worker     // It is designed to directly use data from memory mapped resources.
init(const int32_t * data,int32_t d1,int32_t d2)126*0e209d39SAndroid Build Coastguard Worker     void init(const int32_t* data, int32_t d1, int32_t d2) {
127*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(IEEE_754 == 1);
128*0e209d39SAndroid Build Coastguard Worker         data_ = reinterpret_cast<const float*>(data);
129*0e209d39SAndroid Build Coastguard Worker         d1_ = d1;
130*0e209d39SAndroid Build Coastguard Worker         d2_ = d2;
131*0e209d39SAndroid Build Coastguard Worker     }
132*0e209d39SAndroid Build Coastguard Worker 
133*0e209d39SAndroid Build Coastguard Worker     // ReadArray2D methods.
d1() const134*0e209d39SAndroid Build Coastguard Worker     inline int32_t d1() const override { return d1_; }
d2() const135*0e209d39SAndroid Build Coastguard Worker     inline int32_t d2() const override { return d2_; }
get(int32_t i,int32_t j) const136*0e209d39SAndroid Build Coastguard Worker     float get(int32_t i, int32_t j) const override {
137*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(i < d1_);
138*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(j < d2_);
139*0e209d39SAndroid Build Coastguard Worker         return data_[i * d2_ + j];
140*0e209d39SAndroid Build Coastguard Worker     }
141*0e209d39SAndroid Build Coastguard Worker 
142*0e209d39SAndroid Build Coastguard Worker     // Expose the ith row as a ConstArray1D
row(int32_t i) const143*0e209d39SAndroid Build Coastguard Worker     inline ConstArray1D row(int32_t i) const {
144*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(i < d1_);
145*0e209d39SAndroid Build Coastguard Worker         return ConstArray1D(data_ + i * d2_, d2_);
146*0e209d39SAndroid Build Coastguard Worker     }
147*0e209d39SAndroid Build Coastguard Worker 
148*0e209d39SAndroid Build Coastguard Worker private:
149*0e209d39SAndroid Build Coastguard Worker     const float* data_;
150*0e209d39SAndroid Build Coastguard Worker     int32_t d1_;
151*0e209d39SAndroid Build Coastguard Worker     int32_t d2_;
152*0e209d39SAndroid Build Coastguard Worker };
153*0e209d39SAndroid Build Coastguard Worker 
~ConstArray2D()154*0e209d39SAndroid Build Coastguard Worker ConstArray2D::~ConstArray2D()
155*0e209d39SAndroid Build Coastguard Worker {
156*0e209d39SAndroid Build Coastguard Worker }
157*0e209d39SAndroid Build Coastguard Worker 
158*0e209d39SAndroid Build Coastguard Worker /**
159*0e209d39SAndroid Build Coastguard Worker  * A class to allocate data as a writable 1D array.
160*0e209d39SAndroid Build Coastguard Worker  * This is the main class implement matrix operation.
161*0e209d39SAndroid Build Coastguard Worker  */
162*0e209d39SAndroid Build Coastguard Worker class Array1D : public ReadArray1D {
163*0e209d39SAndroid Build Coastguard Worker public:
Array1D()164*0e209d39SAndroid Build Coastguard Worker     Array1D() : memory_(nullptr), data_(nullptr), d1_(0) {}
Array1D(int32_t d1,UErrorCode & status)165*0e209d39SAndroid Build Coastguard Worker     Array1D(int32_t d1, UErrorCode &status)
166*0e209d39SAndroid Build Coastguard Worker         : memory_(uprv_malloc(d1 * sizeof(float))),
167*0e209d39SAndroid Build Coastguard Worker           data_((float*)memory_), d1_(d1) {
168*0e209d39SAndroid Build Coastguard Worker         if (U_SUCCESS(status)) {
169*0e209d39SAndroid Build Coastguard Worker             if (memory_ == nullptr) {
170*0e209d39SAndroid Build Coastguard Worker                 status = U_MEMORY_ALLOCATION_ERROR;
171*0e209d39SAndroid Build Coastguard Worker                 return;
172*0e209d39SAndroid Build Coastguard Worker             }
173*0e209d39SAndroid Build Coastguard Worker             clear();
174*0e209d39SAndroid Build Coastguard Worker         }
175*0e209d39SAndroid Build Coastguard Worker     }
176*0e209d39SAndroid Build Coastguard Worker 
177*0e209d39SAndroid Build Coastguard Worker     virtual ~Array1D();
178*0e209d39SAndroid Build Coastguard Worker 
179*0e209d39SAndroid Build Coastguard Worker     // A special constructor which does not own the memory but writeable
180*0e209d39SAndroid Build Coastguard Worker     // as a slice of an array.
Array1D(float * data,int32_t d1)181*0e209d39SAndroid Build Coastguard Worker     Array1D(float* data, int32_t d1)
182*0e209d39SAndroid Build Coastguard Worker         : memory_(nullptr), data_(data), d1_(d1) {}
183*0e209d39SAndroid Build Coastguard Worker 
184*0e209d39SAndroid Build Coastguard Worker     // ReadArray1D methods.
d1() const185*0e209d39SAndroid Build Coastguard Worker     virtual int32_t d1() const override { return d1_; }
get(int32_t i) const186*0e209d39SAndroid Build Coastguard Worker     virtual float get(int32_t i) const override {
187*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(i < d1_);
188*0e209d39SAndroid Build Coastguard Worker         return data_[i];
189*0e209d39SAndroid Build Coastguard Worker     }
190*0e209d39SAndroid Build Coastguard Worker 
191*0e209d39SAndroid Build Coastguard Worker     // Return the index which point to the max data in the array.
maxIndex() const192*0e209d39SAndroid Build Coastguard Worker     inline int32_t maxIndex() const {
193*0e209d39SAndroid Build Coastguard Worker         int32_t index = 0;
194*0e209d39SAndroid Build Coastguard Worker         float max = data_[0];
195*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 1; i < d1_; i++) {
196*0e209d39SAndroid Build Coastguard Worker             if (data_[i] > max) {
197*0e209d39SAndroid Build Coastguard Worker                 max = data_[i];
198*0e209d39SAndroid Build Coastguard Worker                 index = i;
199*0e209d39SAndroid Build Coastguard Worker             }
200*0e209d39SAndroid Build Coastguard Worker         }
201*0e209d39SAndroid Build Coastguard Worker         return index;
202*0e209d39SAndroid Build Coastguard Worker     }
203*0e209d39SAndroid Build Coastguard Worker 
204*0e209d39SAndroid Build Coastguard Worker     // Slice part of the array to a new one.
slice(int32_t from,int32_t size) const205*0e209d39SAndroid Build Coastguard Worker     inline Array1D slice(int32_t from, int32_t size) const {
206*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(from >= 0);
207*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(from < d1_);
208*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(from + size <= d1_);
209*0e209d39SAndroid Build Coastguard Worker         return Array1D(data_ + from, size);
210*0e209d39SAndroid Build Coastguard Worker     }
211*0e209d39SAndroid Build Coastguard Worker 
212*0e209d39SAndroid Build Coastguard Worker     // Add dot product of a 1D array and a 2D array into this one.
addDotProduct(const ReadArray1D & a,const ReadArray2D & b)213*0e209d39SAndroid Build Coastguard Worker     inline Array1D& addDotProduct(const ReadArray1D& a, const ReadArray2D& b) {
214*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(a.d1() == b.d1());
215*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(b.d2() == d1());
216*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 0; i < d1(); i++) {
217*0e209d39SAndroid Build Coastguard Worker             for (int32_t j = 0; j < a.d1(); j++) {
218*0e209d39SAndroid Build Coastguard Worker                 data_[i] += a.get(j) * b.get(j, i);
219*0e209d39SAndroid Build Coastguard Worker             }
220*0e209d39SAndroid Build Coastguard Worker         }
221*0e209d39SAndroid Build Coastguard Worker         return *this;
222*0e209d39SAndroid Build Coastguard Worker     }
223*0e209d39SAndroid Build Coastguard Worker 
224*0e209d39SAndroid Build Coastguard Worker     // Hadamard Product the values of another array of the same size into this one.
hadamardProduct(const ReadArray1D & a)225*0e209d39SAndroid Build Coastguard Worker     inline Array1D& hadamardProduct(const ReadArray1D& a) {
226*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(a.d1() == d1());
227*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 0; i < d1(); i++) {
228*0e209d39SAndroid Build Coastguard Worker             data_[i] *= a.get(i);
229*0e209d39SAndroid Build Coastguard Worker         }
230*0e209d39SAndroid Build Coastguard Worker         return *this;
231*0e209d39SAndroid Build Coastguard Worker     }
232*0e209d39SAndroid Build Coastguard Worker 
233*0e209d39SAndroid Build Coastguard Worker     // Add the Hadamard Product of two arrays of the same size into this one.
addHadamardProduct(const ReadArray1D & a,const ReadArray1D & b)234*0e209d39SAndroid Build Coastguard Worker     inline Array1D& addHadamardProduct(const ReadArray1D& a, const ReadArray1D& b) {
235*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(a.d1() == d1());
236*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(b.d1() == d1());
237*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 0; i < d1(); i++) {
238*0e209d39SAndroid Build Coastguard Worker             data_[i] += a.get(i) * b.get(i);
239*0e209d39SAndroid Build Coastguard Worker         }
240*0e209d39SAndroid Build Coastguard Worker         return *this;
241*0e209d39SAndroid Build Coastguard Worker     }
242*0e209d39SAndroid Build Coastguard Worker 
243*0e209d39SAndroid Build Coastguard Worker     // Add the values of another array of the same size into this one.
add(const ReadArray1D & a)244*0e209d39SAndroid Build Coastguard Worker     inline Array1D& add(const ReadArray1D& a) {
245*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(a.d1() == d1());
246*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 0; i < d1(); i++) {
247*0e209d39SAndroid Build Coastguard Worker             data_[i] += a.get(i);
248*0e209d39SAndroid Build Coastguard Worker         }
249*0e209d39SAndroid Build Coastguard Worker         return *this;
250*0e209d39SAndroid Build Coastguard Worker     }
251*0e209d39SAndroid Build Coastguard Worker 
252*0e209d39SAndroid Build Coastguard Worker     // Assign the values of another array of the same size into this one.
assign(const ReadArray1D & a)253*0e209d39SAndroid Build Coastguard Worker     inline Array1D& assign(const ReadArray1D& a) {
254*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(a.d1() == d1());
255*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 0; i < d1(); i++) {
256*0e209d39SAndroid Build Coastguard Worker             data_[i] = a.get(i);
257*0e209d39SAndroid Build Coastguard Worker         }
258*0e209d39SAndroid Build Coastguard Worker         return *this;
259*0e209d39SAndroid Build Coastguard Worker     }
260*0e209d39SAndroid Build Coastguard Worker 
261*0e209d39SAndroid Build Coastguard Worker     // Apply tanh to all the elements in the array.
tanh()262*0e209d39SAndroid Build Coastguard Worker     inline Array1D& tanh() {
263*0e209d39SAndroid Build Coastguard Worker         return tanh(*this);
264*0e209d39SAndroid Build Coastguard Worker     }
265*0e209d39SAndroid Build Coastguard Worker 
266*0e209d39SAndroid Build Coastguard Worker     // Apply tanh of a and store into this array.
tanh(const Array1D & a)267*0e209d39SAndroid Build Coastguard Worker     inline Array1D& tanh(const Array1D& a) {
268*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(a.d1() == d1());
269*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 0; i < d1_; i++) {
270*0e209d39SAndroid Build Coastguard Worker             data_[i] = std::tanh(a.get(i));
271*0e209d39SAndroid Build Coastguard Worker         }
272*0e209d39SAndroid Build Coastguard Worker         return *this;
273*0e209d39SAndroid Build Coastguard Worker     }
274*0e209d39SAndroid Build Coastguard Worker 
275*0e209d39SAndroid Build Coastguard Worker     // Apply sigmoid to all the elements in the array.
sigmoid()276*0e209d39SAndroid Build Coastguard Worker     inline Array1D& sigmoid() {
277*0e209d39SAndroid Build Coastguard Worker         for (int32_t i = 0; i < d1_; i++) {
278*0e209d39SAndroid Build Coastguard Worker             data_[i] = 1.0f/(1.0f + expf(-data_[i]));
279*0e209d39SAndroid Build Coastguard Worker         }
280*0e209d39SAndroid Build Coastguard Worker         return *this;
281*0e209d39SAndroid Build Coastguard Worker     }
282*0e209d39SAndroid Build Coastguard Worker 
clear()283*0e209d39SAndroid Build Coastguard Worker     inline Array1D& clear() {
284*0e209d39SAndroid Build Coastguard Worker         uprv_memset(data_, 0, d1_ * sizeof(float));
285*0e209d39SAndroid Build Coastguard Worker         return *this;
286*0e209d39SAndroid Build Coastguard Worker     }
287*0e209d39SAndroid Build Coastguard Worker 
288*0e209d39SAndroid Build Coastguard Worker private:
289*0e209d39SAndroid Build Coastguard Worker     void* memory_;
290*0e209d39SAndroid Build Coastguard Worker     float* data_;
291*0e209d39SAndroid Build Coastguard Worker     int32_t d1_;
292*0e209d39SAndroid Build Coastguard Worker };
293*0e209d39SAndroid Build Coastguard Worker 
~Array1D()294*0e209d39SAndroid Build Coastguard Worker Array1D::~Array1D()
295*0e209d39SAndroid Build Coastguard Worker {
296*0e209d39SAndroid Build Coastguard Worker     uprv_free(memory_);
297*0e209d39SAndroid Build Coastguard Worker }
298*0e209d39SAndroid Build Coastguard Worker 
299*0e209d39SAndroid Build Coastguard Worker class Array2D : public ReadArray2D {
300*0e209d39SAndroid Build Coastguard Worker public:
Array2D()301*0e209d39SAndroid Build Coastguard Worker     Array2D() : memory_(nullptr), data_(nullptr), d1_(0), d2_(0) {}
Array2D(int32_t d1,int32_t d2,UErrorCode & status)302*0e209d39SAndroid Build Coastguard Worker     Array2D(int32_t d1, int32_t d2, UErrorCode &status)
303*0e209d39SAndroid Build Coastguard Worker         : memory_(uprv_malloc(d1 * d2 * sizeof(float))),
304*0e209d39SAndroid Build Coastguard Worker           data_((float*)memory_), d1_(d1), d2_(d2) {
305*0e209d39SAndroid Build Coastguard Worker         if (U_SUCCESS(status)) {
306*0e209d39SAndroid Build Coastguard Worker             if (memory_ == nullptr) {
307*0e209d39SAndroid Build Coastguard Worker                 status = U_MEMORY_ALLOCATION_ERROR;
308*0e209d39SAndroid Build Coastguard Worker                 return;
309*0e209d39SAndroid Build Coastguard Worker             }
310*0e209d39SAndroid Build Coastguard Worker             clear();
311*0e209d39SAndroid Build Coastguard Worker         }
312*0e209d39SAndroid Build Coastguard Worker     }
313*0e209d39SAndroid Build Coastguard Worker     virtual ~Array2D();
314*0e209d39SAndroid Build Coastguard Worker 
315*0e209d39SAndroid Build Coastguard Worker     // ReadArray2D methods.
d1() const316*0e209d39SAndroid Build Coastguard Worker     virtual int32_t d1() const override { return d1_; }
d2() const317*0e209d39SAndroid Build Coastguard Worker     virtual int32_t d2() const override { return d2_; }
get(int32_t i,int32_t j) const318*0e209d39SAndroid Build Coastguard Worker     virtual float get(int32_t i, int32_t j) const override {
319*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(i < d1_);
320*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(j < d2_);
321*0e209d39SAndroid Build Coastguard Worker         return data_[i * d2_ + j];
322*0e209d39SAndroid Build Coastguard Worker     }
323*0e209d39SAndroid Build Coastguard Worker 
row(int32_t i) const324*0e209d39SAndroid Build Coastguard Worker     inline Array1D row(int32_t i) const {
325*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(i < d1_);
326*0e209d39SAndroid Build Coastguard Worker         return Array1D(data_ + i * d2_, d2_);
327*0e209d39SAndroid Build Coastguard Worker     }
328*0e209d39SAndroid Build Coastguard Worker 
clear()329*0e209d39SAndroid Build Coastguard Worker     inline Array2D& clear() {
330*0e209d39SAndroid Build Coastguard Worker         uprv_memset(data_, 0, d1_ * d2_ * sizeof(float));
331*0e209d39SAndroid Build Coastguard Worker         return *this;
332*0e209d39SAndroid Build Coastguard Worker     }
333*0e209d39SAndroid Build Coastguard Worker 
334*0e209d39SAndroid Build Coastguard Worker private:
335*0e209d39SAndroid Build Coastguard Worker     void* memory_;
336*0e209d39SAndroid Build Coastguard Worker     float* data_;
337*0e209d39SAndroid Build Coastguard Worker     int32_t d1_;
338*0e209d39SAndroid Build Coastguard Worker     int32_t d2_;
339*0e209d39SAndroid Build Coastguard Worker };
340*0e209d39SAndroid Build Coastguard Worker 
~Array2D()341*0e209d39SAndroid Build Coastguard Worker Array2D::~Array2D()
342*0e209d39SAndroid Build Coastguard Worker {
343*0e209d39SAndroid Build Coastguard Worker     uprv_free(memory_);
344*0e209d39SAndroid Build Coastguard Worker }
345*0e209d39SAndroid Build Coastguard Worker 
346*0e209d39SAndroid Build Coastguard Worker typedef enum {
347*0e209d39SAndroid Build Coastguard Worker     BEGIN,
348*0e209d39SAndroid Build Coastguard Worker     INSIDE,
349*0e209d39SAndroid Build Coastguard Worker     END,
350*0e209d39SAndroid Build Coastguard Worker     SINGLE
351*0e209d39SAndroid Build Coastguard Worker } LSTMClass;
352*0e209d39SAndroid Build Coastguard Worker 
353*0e209d39SAndroid Build Coastguard Worker typedef enum {
354*0e209d39SAndroid Build Coastguard Worker     UNKNOWN,
355*0e209d39SAndroid Build Coastguard Worker     CODE_POINTS,
356*0e209d39SAndroid Build Coastguard Worker     GRAPHEME_CLUSTER,
357*0e209d39SAndroid Build Coastguard Worker } EmbeddingType;
358*0e209d39SAndroid Build Coastguard Worker 
359*0e209d39SAndroid Build Coastguard Worker struct LSTMData : public UMemory {
360*0e209d39SAndroid Build Coastguard Worker     LSTMData(UResourceBundle* rb, UErrorCode &status);
361*0e209d39SAndroid Build Coastguard Worker     ~LSTMData();
362*0e209d39SAndroid Build Coastguard Worker     UHashtable* fDict;
363*0e209d39SAndroid Build Coastguard Worker     EmbeddingType fType;
364*0e209d39SAndroid Build Coastguard Worker     const char16_t* fName;
365*0e209d39SAndroid Build Coastguard Worker     ConstArray2D fEmbedding;
366*0e209d39SAndroid Build Coastguard Worker     ConstArray2D fForwardW;
367*0e209d39SAndroid Build Coastguard Worker     ConstArray2D fForwardU;
368*0e209d39SAndroid Build Coastguard Worker     ConstArray1D fForwardB;
369*0e209d39SAndroid Build Coastguard Worker     ConstArray2D fBackwardW;
370*0e209d39SAndroid Build Coastguard Worker     ConstArray2D fBackwardU;
371*0e209d39SAndroid Build Coastguard Worker     ConstArray1D fBackwardB;
372*0e209d39SAndroid Build Coastguard Worker     ConstArray2D fOutputW;
373*0e209d39SAndroid Build Coastguard Worker     ConstArray1D fOutputB;
374*0e209d39SAndroid Build Coastguard Worker 
375*0e209d39SAndroid Build Coastguard Worker private:
376*0e209d39SAndroid Build Coastguard Worker     UResourceBundle* fBundle;
377*0e209d39SAndroid Build Coastguard Worker };
378*0e209d39SAndroid Build Coastguard Worker 
LSTMData(UResourceBundle * rb,UErrorCode & status)379*0e209d39SAndroid Build Coastguard Worker LSTMData::LSTMData(UResourceBundle* rb, UErrorCode &status)
380*0e209d39SAndroid Build Coastguard Worker     : fDict(nullptr), fType(UNKNOWN), fName(nullptr),
381*0e209d39SAndroid Build Coastguard Worker       fBundle(rb)
382*0e209d39SAndroid Build Coastguard Worker {
383*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) {
384*0e209d39SAndroid Build Coastguard Worker         return;
385*0e209d39SAndroid Build Coastguard Worker     }
386*0e209d39SAndroid Build Coastguard Worker     if (IEEE_754 != 1) {
387*0e209d39SAndroid Build Coastguard Worker         status = U_UNSUPPORTED_ERROR;
388*0e209d39SAndroid Build Coastguard Worker         return;
389*0e209d39SAndroid Build Coastguard Worker     }
390*0e209d39SAndroid Build Coastguard Worker     LocalUResourceBundlePointer embeddings_res(
391*0e209d39SAndroid Build Coastguard Worker         ures_getByKey(rb, "embeddings", nullptr, &status));
392*0e209d39SAndroid Build Coastguard Worker     int32_t embedding_size = ures_getInt(embeddings_res.getAlias(), &status);
393*0e209d39SAndroid Build Coastguard Worker     LocalUResourceBundlePointer hunits_res(
394*0e209d39SAndroid Build Coastguard Worker         ures_getByKey(rb, "hunits", nullptr, &status));
395*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return;
396*0e209d39SAndroid Build Coastguard Worker     int32_t hunits = ures_getInt(hunits_res.getAlias(), &status);
397*0e209d39SAndroid Build Coastguard Worker     const char16_t* type = ures_getStringByKey(rb, "type", nullptr, &status);
398*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return;
399*0e209d39SAndroid Build Coastguard Worker     if (u_strCompare(type, -1, u"codepoints", -1, false) == 0) {
400*0e209d39SAndroid Build Coastguard Worker         fType = CODE_POINTS;
401*0e209d39SAndroid Build Coastguard Worker     } else if (u_strCompare(type, -1, u"graphclust", -1, false) == 0) {
402*0e209d39SAndroid Build Coastguard Worker         fType = GRAPHEME_CLUSTER;
403*0e209d39SAndroid Build Coastguard Worker     }
404*0e209d39SAndroid Build Coastguard Worker     fName = ures_getStringByKey(rb, "model", nullptr, &status);
405*0e209d39SAndroid Build Coastguard Worker     LocalUResourceBundlePointer dataRes(ures_getByKey(rb, "data", nullptr, &status));
406*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return;
407*0e209d39SAndroid Build Coastguard Worker     int32_t data_len = 0;
408*0e209d39SAndroid Build Coastguard Worker     const int32_t* data = ures_getIntVector(dataRes.getAlias(), &data_len, &status);
409*0e209d39SAndroid Build Coastguard Worker     fDict = uhash_open(uhash_hashUChars, uhash_compareUChars, nullptr, &status);
410*0e209d39SAndroid Build Coastguard Worker 
411*0e209d39SAndroid Build Coastguard Worker     StackUResourceBundle stackTempBundle;
412*0e209d39SAndroid Build Coastguard Worker     ResourceDataValue value;
413*0e209d39SAndroid Build Coastguard Worker     ures_getValueWithFallback(rb, "dict", stackTempBundle.getAlias(), value, status);
414*0e209d39SAndroid Build Coastguard Worker     ResourceArray stringArray = value.getArray(status);
415*0e209d39SAndroid Build Coastguard Worker     int32_t num_index = stringArray.getSize();
416*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) { return; }
417*0e209d39SAndroid Build Coastguard Worker 
418*0e209d39SAndroid Build Coastguard Worker     // put dict into hash
419*0e209d39SAndroid Build Coastguard Worker     int32_t stringLength;
420*0e209d39SAndroid Build Coastguard Worker     for (int32_t idx = 0; idx < num_index; idx++) {
421*0e209d39SAndroid Build Coastguard Worker         stringArray.getValue(idx, value);
422*0e209d39SAndroid Build Coastguard Worker         const char16_t* str = value.getString(stringLength, status);
423*0e209d39SAndroid Build Coastguard Worker         uhash_putiAllowZero(fDict, (void*)str, idx, &status);
424*0e209d39SAndroid Build Coastguard Worker         if (U_FAILURE(status)) return;
425*0e209d39SAndroid Build Coastguard Worker #ifdef LSTM_VECTORIZER_DEBUG
426*0e209d39SAndroid Build Coastguard Worker         printf("Assign [");
427*0e209d39SAndroid Build Coastguard Worker         while (*str != 0x0000) {
428*0e209d39SAndroid Build Coastguard Worker             printf("U+%04x ", *str);
429*0e209d39SAndroid Build Coastguard Worker             str++;
430*0e209d39SAndroid Build Coastguard Worker         }
431*0e209d39SAndroid Build Coastguard Worker         printf("] map to %d\n", idx-1);
432*0e209d39SAndroid Build Coastguard Worker #endif
433*0e209d39SAndroid Build Coastguard Worker     }
434*0e209d39SAndroid Build Coastguard Worker     int32_t mat1_size = (num_index + 1) * embedding_size;
435*0e209d39SAndroid Build Coastguard Worker     int32_t mat2_size = embedding_size * 4 * hunits;
436*0e209d39SAndroid Build Coastguard Worker     int32_t mat3_size = hunits * 4 * hunits;
437*0e209d39SAndroid Build Coastguard Worker     int32_t mat4_size = 4 * hunits;
438*0e209d39SAndroid Build Coastguard Worker     int32_t mat5_size = mat2_size;
439*0e209d39SAndroid Build Coastguard Worker     int32_t mat6_size = mat3_size;
440*0e209d39SAndroid Build Coastguard Worker     int32_t mat7_size = mat4_size;
441*0e209d39SAndroid Build Coastguard Worker     int32_t mat8_size = 2 * hunits * 4;
442*0e209d39SAndroid Build Coastguard Worker #if U_DEBUG
443*0e209d39SAndroid Build Coastguard Worker     int32_t mat9_size = 4;
444*0e209d39SAndroid Build Coastguard Worker     U_ASSERT(data_len == mat1_size + mat2_size + mat3_size + mat4_size + mat5_size +
445*0e209d39SAndroid Build Coastguard Worker         mat6_size + mat7_size + mat8_size + mat9_size);
446*0e209d39SAndroid Build Coastguard Worker #endif
447*0e209d39SAndroid Build Coastguard Worker 
448*0e209d39SAndroid Build Coastguard Worker     fEmbedding.init(data, (num_index + 1), embedding_size);
449*0e209d39SAndroid Build Coastguard Worker     data += mat1_size;
450*0e209d39SAndroid Build Coastguard Worker     fForwardW.init(data, embedding_size, 4 * hunits);
451*0e209d39SAndroid Build Coastguard Worker     data += mat2_size;
452*0e209d39SAndroid Build Coastguard Worker     fForwardU.init(data, hunits, 4 * hunits);
453*0e209d39SAndroid Build Coastguard Worker     data += mat3_size;
454*0e209d39SAndroid Build Coastguard Worker     fForwardB.init(data, 4 * hunits);
455*0e209d39SAndroid Build Coastguard Worker     data += mat4_size;
456*0e209d39SAndroid Build Coastguard Worker     fBackwardW.init(data, embedding_size, 4 * hunits);
457*0e209d39SAndroid Build Coastguard Worker     data += mat5_size;
458*0e209d39SAndroid Build Coastguard Worker     fBackwardU.init(data, hunits, 4 * hunits);
459*0e209d39SAndroid Build Coastguard Worker     data += mat6_size;
460*0e209d39SAndroid Build Coastguard Worker     fBackwardB.init(data, 4 * hunits);
461*0e209d39SAndroid Build Coastguard Worker     data += mat7_size;
462*0e209d39SAndroid Build Coastguard Worker     fOutputW.init(data, 2 * hunits, 4);
463*0e209d39SAndroid Build Coastguard Worker     data += mat8_size;
464*0e209d39SAndroid Build Coastguard Worker     fOutputB.init(data, 4);
465*0e209d39SAndroid Build Coastguard Worker }
466*0e209d39SAndroid Build Coastguard Worker 
~LSTMData()467*0e209d39SAndroid Build Coastguard Worker LSTMData::~LSTMData() {
468*0e209d39SAndroid Build Coastguard Worker     uhash_close(fDict);
469*0e209d39SAndroid Build Coastguard Worker     ures_close(fBundle);
470*0e209d39SAndroid Build Coastguard Worker }
471*0e209d39SAndroid Build Coastguard Worker 
472*0e209d39SAndroid Build Coastguard Worker class Vectorizer : public UMemory {
473*0e209d39SAndroid Build Coastguard Worker public:
Vectorizer(UHashtable * dict)474*0e209d39SAndroid Build Coastguard Worker     Vectorizer(UHashtable* dict) : fDict(dict) {}
475*0e209d39SAndroid Build Coastguard Worker     virtual ~Vectorizer();
476*0e209d39SAndroid Build Coastguard Worker     virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
477*0e209d39SAndroid Build Coastguard Worker                            UVector32 &offsets, UVector32 &indices,
478*0e209d39SAndroid Build Coastguard Worker                            UErrorCode &status) const = 0;
479*0e209d39SAndroid Build Coastguard Worker protected:
stringToIndex(const char16_t * str) const480*0e209d39SAndroid Build Coastguard Worker     int32_t stringToIndex(const char16_t* str) const {
481*0e209d39SAndroid Build Coastguard Worker         UBool found = false;
482*0e209d39SAndroid Build Coastguard Worker         int32_t ret = uhash_getiAndFound(fDict, (const void*)str, &found);
483*0e209d39SAndroid Build Coastguard Worker         if (!found) {
484*0e209d39SAndroid Build Coastguard Worker             ret = fDict->count;
485*0e209d39SAndroid Build Coastguard Worker         }
486*0e209d39SAndroid Build Coastguard Worker #ifdef LSTM_VECTORIZER_DEBUG
487*0e209d39SAndroid Build Coastguard Worker         printf("[");
488*0e209d39SAndroid Build Coastguard Worker         while (*str != 0x0000) {
489*0e209d39SAndroid Build Coastguard Worker             printf("U+%04x ", *str);
490*0e209d39SAndroid Build Coastguard Worker             str++;
491*0e209d39SAndroid Build Coastguard Worker         }
492*0e209d39SAndroid Build Coastguard Worker         printf("] map to %d\n", ret);
493*0e209d39SAndroid Build Coastguard Worker #endif
494*0e209d39SAndroid Build Coastguard Worker         return ret;
495*0e209d39SAndroid Build Coastguard Worker     }
496*0e209d39SAndroid Build Coastguard Worker 
497*0e209d39SAndroid Build Coastguard Worker private:
498*0e209d39SAndroid Build Coastguard Worker     UHashtable* fDict;
499*0e209d39SAndroid Build Coastguard Worker };
500*0e209d39SAndroid Build Coastguard Worker 
~Vectorizer()501*0e209d39SAndroid Build Coastguard Worker Vectorizer::~Vectorizer()
502*0e209d39SAndroid Build Coastguard Worker {
503*0e209d39SAndroid Build Coastguard Worker }
504*0e209d39SAndroid Build Coastguard Worker 
505*0e209d39SAndroid Build Coastguard Worker class CodePointsVectorizer : public Vectorizer {
506*0e209d39SAndroid Build Coastguard Worker public:
CodePointsVectorizer(UHashtable * dict)507*0e209d39SAndroid Build Coastguard Worker     CodePointsVectorizer(UHashtable* dict) : Vectorizer(dict) {}
508*0e209d39SAndroid Build Coastguard Worker     virtual ~CodePointsVectorizer();
509*0e209d39SAndroid Build Coastguard Worker     virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
510*0e209d39SAndroid Build Coastguard Worker                            UVector32 &offsets, UVector32 &indices,
511*0e209d39SAndroid Build Coastguard Worker                            UErrorCode &status) const override;
512*0e209d39SAndroid Build Coastguard Worker };
513*0e209d39SAndroid Build Coastguard Worker 
~CodePointsVectorizer()514*0e209d39SAndroid Build Coastguard Worker CodePointsVectorizer::~CodePointsVectorizer()
515*0e209d39SAndroid Build Coastguard Worker {
516*0e209d39SAndroid Build Coastguard Worker }
517*0e209d39SAndroid Build Coastguard Worker 
vectorize(UText * text,int32_t startPos,int32_t endPos,UVector32 & offsets,UVector32 & indices,UErrorCode & status) const518*0e209d39SAndroid Build Coastguard Worker void CodePointsVectorizer::vectorize(
519*0e209d39SAndroid Build Coastguard Worker     UText *text, int32_t startPos, int32_t endPos,
520*0e209d39SAndroid Build Coastguard Worker     UVector32 &offsets, UVector32 &indices, UErrorCode &status) const
521*0e209d39SAndroid Build Coastguard Worker {
522*0e209d39SAndroid Build Coastguard Worker     if (offsets.ensureCapacity(endPos - startPos, status) &&
523*0e209d39SAndroid Build Coastguard Worker             indices.ensureCapacity(endPos - startPos, status)) {
524*0e209d39SAndroid Build Coastguard Worker         if (U_FAILURE(status)) return;
525*0e209d39SAndroid Build Coastguard Worker         utext_setNativeIndex(text, startPos);
526*0e209d39SAndroid Build Coastguard Worker         int32_t current;
527*0e209d39SAndroid Build Coastguard Worker         char16_t str[2] = {0, 0};
528*0e209d39SAndroid Build Coastguard Worker         while (U_SUCCESS(status) &&
529*0e209d39SAndroid Build Coastguard Worker                (current = (int32_t)utext_getNativeIndex(text)) < endPos) {
530*0e209d39SAndroid Build Coastguard Worker             // Since the LSTMBreakEngine is currently only accept chars in BMP,
531*0e209d39SAndroid Build Coastguard Worker             // we can ignore the possibility of hitting supplementary code
532*0e209d39SAndroid Build Coastguard Worker             // point.
533*0e209d39SAndroid Build Coastguard Worker             str[0] = (char16_t) utext_next32(text);
534*0e209d39SAndroid Build Coastguard Worker             U_ASSERT(!U_IS_SURROGATE(str[0]));
535*0e209d39SAndroid Build Coastguard Worker             offsets.addElement(current, status);
536*0e209d39SAndroid Build Coastguard Worker             indices.addElement(stringToIndex(str), status);
537*0e209d39SAndroid Build Coastguard Worker         }
538*0e209d39SAndroid Build Coastguard Worker     }
539*0e209d39SAndroid Build Coastguard Worker }
540*0e209d39SAndroid Build Coastguard Worker 
541*0e209d39SAndroid Build Coastguard Worker class GraphemeClusterVectorizer : public Vectorizer {
542*0e209d39SAndroid Build Coastguard Worker public:
GraphemeClusterVectorizer(UHashtable * dict)543*0e209d39SAndroid Build Coastguard Worker     GraphemeClusterVectorizer(UHashtable* dict)
544*0e209d39SAndroid Build Coastguard Worker         : Vectorizer(dict)
545*0e209d39SAndroid Build Coastguard Worker     {
546*0e209d39SAndroid Build Coastguard Worker     }
547*0e209d39SAndroid Build Coastguard Worker     virtual ~GraphemeClusterVectorizer();
548*0e209d39SAndroid Build Coastguard Worker     virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
549*0e209d39SAndroid Build Coastguard Worker                            UVector32 &offsets, UVector32 &indices,
550*0e209d39SAndroid Build Coastguard Worker                            UErrorCode &status) const override;
551*0e209d39SAndroid Build Coastguard Worker };
552*0e209d39SAndroid Build Coastguard Worker 
~GraphemeClusterVectorizer()553*0e209d39SAndroid Build Coastguard Worker GraphemeClusterVectorizer::~GraphemeClusterVectorizer()
554*0e209d39SAndroid Build Coastguard Worker {
555*0e209d39SAndroid Build Coastguard Worker }
556*0e209d39SAndroid Build Coastguard Worker 
557*0e209d39SAndroid Build Coastguard Worker constexpr int32_t MAX_GRAPHEME_CLSTER_LENGTH = 10;
558*0e209d39SAndroid Build Coastguard Worker 
vectorize(UText * text,int32_t startPos,int32_t endPos,UVector32 & offsets,UVector32 & indices,UErrorCode & status) const559*0e209d39SAndroid Build Coastguard Worker void GraphemeClusterVectorizer::vectorize(
560*0e209d39SAndroid Build Coastguard Worker     UText *text, int32_t startPos, int32_t endPos,
561*0e209d39SAndroid Build Coastguard Worker     UVector32 &offsets, UVector32 &indices, UErrorCode &status) const
562*0e209d39SAndroid Build Coastguard Worker {
563*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return;
564*0e209d39SAndroid Build Coastguard Worker     if (!offsets.ensureCapacity(endPos - startPos, status) ||
565*0e209d39SAndroid Build Coastguard Worker             !indices.ensureCapacity(endPos - startPos, status)) {
566*0e209d39SAndroid Build Coastguard Worker         return;
567*0e209d39SAndroid Build Coastguard Worker     }
568*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return;
569*0e209d39SAndroid Build Coastguard Worker     LocalPointer<BreakIterator> graphemeIter(BreakIterator::createCharacterInstance(Locale(), status));
570*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return;
571*0e209d39SAndroid Build Coastguard Worker     graphemeIter->setText(text, status);
572*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return;
573*0e209d39SAndroid Build Coastguard Worker 
574*0e209d39SAndroid Build Coastguard Worker     if (startPos != 0) {
575*0e209d39SAndroid Build Coastguard Worker         graphemeIter->preceding(startPos);
576*0e209d39SAndroid Build Coastguard Worker     }
577*0e209d39SAndroid Build Coastguard Worker     int32_t last = startPos;
578*0e209d39SAndroid Build Coastguard Worker     int32_t current = startPos;
579*0e209d39SAndroid Build Coastguard Worker     char16_t str[MAX_GRAPHEME_CLSTER_LENGTH];
580*0e209d39SAndroid Build Coastguard Worker     while ((current = graphemeIter->next()) != BreakIterator::DONE) {
581*0e209d39SAndroid Build Coastguard Worker         if (current >= endPos) {
582*0e209d39SAndroid Build Coastguard Worker             break;
583*0e209d39SAndroid Build Coastguard Worker         }
584*0e209d39SAndroid Build Coastguard Worker         if (current > startPos) {
585*0e209d39SAndroid Build Coastguard Worker             utext_extract(text, last, current, str, MAX_GRAPHEME_CLSTER_LENGTH, &status);
586*0e209d39SAndroid Build Coastguard Worker             if (U_FAILURE(status)) return;
587*0e209d39SAndroid Build Coastguard Worker             offsets.addElement(last, status);
588*0e209d39SAndroid Build Coastguard Worker             indices.addElement(stringToIndex(str), status);
589*0e209d39SAndroid Build Coastguard Worker             if (U_FAILURE(status)) return;
590*0e209d39SAndroid Build Coastguard Worker         }
591*0e209d39SAndroid Build Coastguard Worker         last = current;
592*0e209d39SAndroid Build Coastguard Worker     }
593*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status) || last >= endPos) {
594*0e209d39SAndroid Build Coastguard Worker         return;
595*0e209d39SAndroid Build Coastguard Worker     }
596*0e209d39SAndroid Build Coastguard Worker     utext_extract(text, last, endPos, str, MAX_GRAPHEME_CLSTER_LENGTH, &status);
597*0e209d39SAndroid Build Coastguard Worker     if (U_SUCCESS(status)) {
598*0e209d39SAndroid Build Coastguard Worker         offsets.addElement(last, status);
599*0e209d39SAndroid Build Coastguard Worker         indices.addElement(stringToIndex(str), status);
600*0e209d39SAndroid Build Coastguard Worker     }
601*0e209d39SAndroid Build Coastguard Worker }
602*0e209d39SAndroid Build Coastguard Worker 
603*0e209d39SAndroid Build Coastguard Worker // Computing LSTM as stated in
604*0e209d39SAndroid Build Coastguard Worker // https://en.wikipedia.org/wiki/Long_short-term_memory#LSTM_with_a_forget_gate
605*0e209d39SAndroid Build Coastguard Worker // ifco is temp array allocate outside which does not need to be
606*0e209d39SAndroid Build Coastguard Worker // input/output value but could avoid unnecessary memory alloc/free if passing
607*0e209d39SAndroid Build Coastguard Worker // in.
compute(int32_t hunits,const ReadArray2D & W,const ReadArray2D & U,const ReadArray1D & b,const ReadArray1D & x,Array1D & h,Array1D & c,Array1D & ifco)608*0e209d39SAndroid Build Coastguard Worker void compute(
609*0e209d39SAndroid Build Coastguard Worker     int32_t hunits,
610*0e209d39SAndroid Build Coastguard Worker     const ReadArray2D& W, const ReadArray2D& U, const ReadArray1D& b,
611*0e209d39SAndroid Build Coastguard Worker     const ReadArray1D& x, Array1D& h, Array1D& c,
612*0e209d39SAndroid Build Coastguard Worker     Array1D& ifco)
613*0e209d39SAndroid Build Coastguard Worker {
614*0e209d39SAndroid Build Coastguard Worker     // ifco = x * W + h * U + b
615*0e209d39SAndroid Build Coastguard Worker     ifco.assign(b)
616*0e209d39SAndroid Build Coastguard Worker         .addDotProduct(x, W)
617*0e209d39SAndroid Build Coastguard Worker         .addDotProduct(h, U);
618*0e209d39SAndroid Build Coastguard Worker 
619*0e209d39SAndroid Build Coastguard Worker     ifco.slice(0*hunits, hunits).sigmoid();  // i: sigmod
620*0e209d39SAndroid Build Coastguard Worker     ifco.slice(1*hunits, hunits).sigmoid(); // f: sigmoid
621*0e209d39SAndroid Build Coastguard Worker     ifco.slice(2*hunits, hunits).tanh(); // c_: tanh
622*0e209d39SAndroid Build Coastguard Worker     ifco.slice(3*hunits, hunits).sigmoid(); // o: sigmod
623*0e209d39SAndroid Build Coastguard Worker 
624*0e209d39SAndroid Build Coastguard Worker     c.hadamardProduct(ifco.slice(hunits, hunits))
625*0e209d39SAndroid Build Coastguard Worker         .addHadamardProduct(ifco.slice(0, hunits), ifco.slice(2*hunits, hunits));
626*0e209d39SAndroid Build Coastguard Worker 
627*0e209d39SAndroid Build Coastguard Worker     h.tanh(c)
628*0e209d39SAndroid Build Coastguard Worker         .hadamardProduct(ifco.slice(3*hunits, hunits));
629*0e209d39SAndroid Build Coastguard Worker }
630*0e209d39SAndroid Build Coastguard Worker 
631*0e209d39SAndroid Build Coastguard Worker // Minimum word size
632*0e209d39SAndroid Build Coastguard Worker static const int32_t MIN_WORD = 2;
633*0e209d39SAndroid Build Coastguard Worker 
634*0e209d39SAndroid Build Coastguard Worker // Minimum number of characters for two words
635*0e209d39SAndroid Build Coastguard Worker static const int32_t MIN_WORD_SPAN = MIN_WORD * 2;
636*0e209d39SAndroid Build Coastguard Worker 
637*0e209d39SAndroid Build Coastguard Worker int32_t
divideUpDictionaryRange(UText * text,int32_t startPos,int32_t endPos,UVector32 & foundBreaks,UBool,UErrorCode & status) const638*0e209d39SAndroid Build Coastguard Worker LSTMBreakEngine::divideUpDictionaryRange( UText *text,
639*0e209d39SAndroid Build Coastguard Worker                                                 int32_t startPos,
640*0e209d39SAndroid Build Coastguard Worker                                                 int32_t endPos,
641*0e209d39SAndroid Build Coastguard Worker                                                 UVector32 &foundBreaks,
642*0e209d39SAndroid Build Coastguard Worker                                                 UBool /* isPhraseBreaking */,
643*0e209d39SAndroid Build Coastguard Worker                                                 UErrorCode& status) const {
644*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return 0;
645*0e209d39SAndroid Build Coastguard Worker     int32_t beginFoundBreakSize = foundBreaks.size();
646*0e209d39SAndroid Build Coastguard Worker     utext_setNativeIndex(text, startPos);
647*0e209d39SAndroid Build Coastguard Worker     utext_moveIndex32(text, MIN_WORD_SPAN);
648*0e209d39SAndroid Build Coastguard Worker     if (utext_getNativeIndex(text) >= endPos) {
649*0e209d39SAndroid Build Coastguard Worker         return 0;       // Not enough characters for two words
650*0e209d39SAndroid Build Coastguard Worker     }
651*0e209d39SAndroid Build Coastguard Worker     utext_setNativeIndex(text, startPos);
652*0e209d39SAndroid Build Coastguard Worker 
653*0e209d39SAndroid Build Coastguard Worker     UVector32 offsets(status);
654*0e209d39SAndroid Build Coastguard Worker     UVector32 indices(status);
655*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return 0;
656*0e209d39SAndroid Build Coastguard Worker     fVectorizer->vectorize(text, startPos, endPos, offsets, indices, status);
657*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return 0;
658*0e209d39SAndroid Build Coastguard Worker     int32_t* offsetsBuf = offsets.getBuffer();
659*0e209d39SAndroid Build Coastguard Worker     int32_t* indicesBuf = indices.getBuffer();
660*0e209d39SAndroid Build Coastguard Worker 
661*0e209d39SAndroid Build Coastguard Worker     int32_t input_seq_len = indices.size();
662*0e209d39SAndroid Build Coastguard Worker     int32_t hunits = fData->fForwardU.d1();
663*0e209d39SAndroid Build Coastguard Worker 
664*0e209d39SAndroid Build Coastguard Worker     // ----- Begin of all the Array memory allocation needed for this function
665*0e209d39SAndroid Build Coastguard Worker     // Allocate temp array used inside compute()
666*0e209d39SAndroid Build Coastguard Worker     Array1D ifco(4 * hunits, status);
667*0e209d39SAndroid Build Coastguard Worker 
668*0e209d39SAndroid Build Coastguard Worker     Array1D c(hunits, status);
669*0e209d39SAndroid Build Coastguard Worker     Array1D logp(4, status);
670*0e209d39SAndroid Build Coastguard Worker 
671*0e209d39SAndroid Build Coastguard Worker     // TODO: limit size of hBackward. If input_seq_len is too big, we could
672*0e209d39SAndroid Build Coastguard Worker     // run out of memory.
673*0e209d39SAndroid Build Coastguard Worker     // Backward LSTM
674*0e209d39SAndroid Build Coastguard Worker     Array2D hBackward(input_seq_len, hunits, status);
675*0e209d39SAndroid Build Coastguard Worker 
676*0e209d39SAndroid Build Coastguard Worker     // Allocate fbRow and slice the internal array in two.
677*0e209d39SAndroid Build Coastguard Worker     Array1D fbRow(2 * hunits, status);
678*0e209d39SAndroid Build Coastguard Worker 
679*0e209d39SAndroid Build Coastguard Worker     // ----- End of all the Array memory allocation needed for this function
680*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return 0;
681*0e209d39SAndroid Build Coastguard Worker 
682*0e209d39SAndroid Build Coastguard Worker     // To save the needed memory usage, the following is different from the
683*0e209d39SAndroid Build Coastguard Worker     // Python or ICU4X implementation. We first perform the Backward LSTM
684*0e209d39SAndroid Build Coastguard Worker     // and then merge the iteration of the forward LSTM and the output layer
685*0e209d39SAndroid Build Coastguard Worker     // together because we only neetdto remember the h[t-1] for Forward LSTM.
686*0e209d39SAndroid Build Coastguard Worker     for (int32_t i = input_seq_len - 1; i >= 0; i--) {
687*0e209d39SAndroid Build Coastguard Worker         Array1D hRow = hBackward.row(i);
688*0e209d39SAndroid Build Coastguard Worker         if (i != input_seq_len - 1) {
689*0e209d39SAndroid Build Coastguard Worker             hRow.assign(hBackward.row(i+1));
690*0e209d39SAndroid Build Coastguard Worker         }
691*0e209d39SAndroid Build Coastguard Worker #ifdef LSTM_DEBUG
692*0e209d39SAndroid Build Coastguard Worker         printf("hRow %d\n", i);
693*0e209d39SAndroid Build Coastguard Worker         hRow.print();
694*0e209d39SAndroid Build Coastguard Worker         printf("indicesBuf[%d] = %d\n", i, indicesBuf[i]);
695*0e209d39SAndroid Build Coastguard Worker         printf("fData->fEmbedding.row(indicesBuf[%d]):\n", i);
696*0e209d39SAndroid Build Coastguard Worker         fData->fEmbedding.row(indicesBuf[i]).print();
697*0e209d39SAndroid Build Coastguard Worker #endif  // LSTM_DEBUG
698*0e209d39SAndroid Build Coastguard Worker         compute(hunits,
699*0e209d39SAndroid Build Coastguard Worker                 fData->fBackwardW, fData->fBackwardU, fData->fBackwardB,
700*0e209d39SAndroid Build Coastguard Worker                 fData->fEmbedding.row(indicesBuf[i]),
701*0e209d39SAndroid Build Coastguard Worker                 hRow, c, ifco);
702*0e209d39SAndroid Build Coastguard Worker     }
703*0e209d39SAndroid Build Coastguard Worker 
704*0e209d39SAndroid Build Coastguard Worker 
705*0e209d39SAndroid Build Coastguard Worker     Array1D forwardRow = fbRow.slice(0, hunits);  // point to first half of data in fbRow.
706*0e209d39SAndroid Build Coastguard Worker     Array1D backwardRow = fbRow.slice(hunits, hunits);  // point to second half of data n fbRow.
707*0e209d39SAndroid Build Coastguard Worker 
708*0e209d39SAndroid Build Coastguard Worker     // The following iteration merge the forward LSTM and the output layer
709*0e209d39SAndroid Build Coastguard Worker     // together.
710*0e209d39SAndroid Build Coastguard Worker     c.clear();  // reuse c since it is the same size.
711*0e209d39SAndroid Build Coastguard Worker     for (int32_t i = 0; i < input_seq_len; i++) {
712*0e209d39SAndroid Build Coastguard Worker #ifdef LSTM_DEBUG
713*0e209d39SAndroid Build Coastguard Worker         printf("forwardRow %d\n", i);
714*0e209d39SAndroid Build Coastguard Worker         forwardRow.print();
715*0e209d39SAndroid Build Coastguard Worker #endif  // LSTM_DEBUG
716*0e209d39SAndroid Build Coastguard Worker         // Forward LSTM
717*0e209d39SAndroid Build Coastguard Worker         // Calculate the result into forwardRow, which point to the data in the first half
718*0e209d39SAndroid Build Coastguard Worker         // of fbRow.
719*0e209d39SAndroid Build Coastguard Worker         compute(hunits,
720*0e209d39SAndroid Build Coastguard Worker                 fData->fForwardW, fData->fForwardU, fData->fForwardB,
721*0e209d39SAndroid Build Coastguard Worker                 fData->fEmbedding.row(indicesBuf[i]),
722*0e209d39SAndroid Build Coastguard Worker                 forwardRow, c, ifco);
723*0e209d39SAndroid Build Coastguard Worker 
724*0e209d39SAndroid Build Coastguard Worker         // assign the data from hBackward.row(i) to second half of fbRowa.
725*0e209d39SAndroid Build Coastguard Worker         backwardRow.assign(hBackward.row(i));
726*0e209d39SAndroid Build Coastguard Worker 
727*0e209d39SAndroid Build Coastguard Worker         logp.assign(fData->fOutputB).addDotProduct(fbRow, fData->fOutputW);
728*0e209d39SAndroid Build Coastguard Worker #ifdef LSTM_DEBUG
729*0e209d39SAndroid Build Coastguard Worker         printf("backwardRow %d\n", i);
730*0e209d39SAndroid Build Coastguard Worker         backwardRow.print();
731*0e209d39SAndroid Build Coastguard Worker         printf("logp %d\n", i);
732*0e209d39SAndroid Build Coastguard Worker         logp.print();
733*0e209d39SAndroid Build Coastguard Worker #endif  // LSTM_DEBUG
734*0e209d39SAndroid Build Coastguard Worker 
735*0e209d39SAndroid Build Coastguard Worker         // current = argmax(logp)
736*0e209d39SAndroid Build Coastguard Worker         LSTMClass current = (LSTMClass)logp.maxIndex();
737*0e209d39SAndroid Build Coastguard Worker         // BIES logic.
738*0e209d39SAndroid Build Coastguard Worker         if (current == BEGIN || current == SINGLE) {
739*0e209d39SAndroid Build Coastguard Worker             if (i != 0) {
740*0e209d39SAndroid Build Coastguard Worker                 foundBreaks.addElement(offsetsBuf[i], status);
741*0e209d39SAndroid Build Coastguard Worker                 if (U_FAILURE(status)) return 0;
742*0e209d39SAndroid Build Coastguard Worker             }
743*0e209d39SAndroid Build Coastguard Worker         }
744*0e209d39SAndroid Build Coastguard Worker     }
745*0e209d39SAndroid Build Coastguard Worker     return foundBreaks.size() - beginFoundBreakSize;
746*0e209d39SAndroid Build Coastguard Worker }
747*0e209d39SAndroid Build Coastguard Worker 
createVectorizer(const LSTMData * data,UErrorCode & status)748*0e209d39SAndroid Build Coastguard Worker Vectorizer* createVectorizer(const LSTMData* data, UErrorCode &status) {
749*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) {
750*0e209d39SAndroid Build Coastguard Worker         return nullptr;
751*0e209d39SAndroid Build Coastguard Worker     }
752*0e209d39SAndroid Build Coastguard Worker     switch (data->fType) {
753*0e209d39SAndroid Build Coastguard Worker         case CODE_POINTS:
754*0e209d39SAndroid Build Coastguard Worker             return new CodePointsVectorizer(data->fDict);
755*0e209d39SAndroid Build Coastguard Worker             break;
756*0e209d39SAndroid Build Coastguard Worker         case GRAPHEME_CLUSTER:
757*0e209d39SAndroid Build Coastguard Worker             return new GraphemeClusterVectorizer(data->fDict);
758*0e209d39SAndroid Build Coastguard Worker             break;
759*0e209d39SAndroid Build Coastguard Worker         default:
760*0e209d39SAndroid Build Coastguard Worker             break;
761*0e209d39SAndroid Build Coastguard Worker     }
762*0e209d39SAndroid Build Coastguard Worker     UPRV_UNREACHABLE_EXIT;
763*0e209d39SAndroid Build Coastguard Worker }
764*0e209d39SAndroid Build Coastguard Worker 
LSTMBreakEngine(const LSTMData * data,const UnicodeSet & set,UErrorCode & status)765*0e209d39SAndroid Build Coastguard Worker LSTMBreakEngine::LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status)
766*0e209d39SAndroid Build Coastguard Worker     : DictionaryBreakEngine(), fData(data), fVectorizer(createVectorizer(fData, status))
767*0e209d39SAndroid Build Coastguard Worker {
768*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) {
769*0e209d39SAndroid Build Coastguard Worker       fData = nullptr;  // If failure, we should not delete fData in destructor because the caller will do so.
770*0e209d39SAndroid Build Coastguard Worker       return;
771*0e209d39SAndroid Build Coastguard Worker     }
772*0e209d39SAndroid Build Coastguard Worker     setCharacters(set);
773*0e209d39SAndroid Build Coastguard Worker }
774*0e209d39SAndroid Build Coastguard Worker 
~LSTMBreakEngine()775*0e209d39SAndroid Build Coastguard Worker LSTMBreakEngine::~LSTMBreakEngine() {
776*0e209d39SAndroid Build Coastguard Worker     delete fData;
777*0e209d39SAndroid Build Coastguard Worker     delete fVectorizer;
778*0e209d39SAndroid Build Coastguard Worker }
779*0e209d39SAndroid Build Coastguard Worker 
name() const780*0e209d39SAndroid Build Coastguard Worker const char16_t* LSTMBreakEngine::name() const {
781*0e209d39SAndroid Build Coastguard Worker     return fData->fName;
782*0e209d39SAndroid Build Coastguard Worker }
783*0e209d39SAndroid Build Coastguard Worker 
defaultLSTM(UScriptCode script,UErrorCode & status)784*0e209d39SAndroid Build Coastguard Worker UnicodeString defaultLSTM(UScriptCode script, UErrorCode& status) {
785*0e209d39SAndroid Build Coastguard Worker     // open root from brkitr tree.
786*0e209d39SAndroid Build Coastguard Worker     UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
787*0e209d39SAndroid Build Coastguard Worker     b = ures_getByKeyWithFallback(b, "lstm", b, &status);
788*0e209d39SAndroid Build Coastguard Worker     UnicodeString result = ures_getUnicodeStringByKey(b, uscript_getShortName(script), &status);
789*0e209d39SAndroid Build Coastguard Worker     ures_close(b);
790*0e209d39SAndroid Build Coastguard Worker     return result;
791*0e209d39SAndroid Build Coastguard Worker }
792*0e209d39SAndroid Build Coastguard Worker 
CreateLSTMDataForScript(UScriptCode script,UErrorCode & status)793*0e209d39SAndroid Build Coastguard Worker U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(UScriptCode script, UErrorCode& status)
794*0e209d39SAndroid Build Coastguard Worker {
795*0e209d39SAndroid Build Coastguard Worker     if (script != USCRIPT_KHMER && script != USCRIPT_LAO && script != USCRIPT_MYANMAR && script != USCRIPT_THAI) {
796*0e209d39SAndroid Build Coastguard Worker         return nullptr;
797*0e209d39SAndroid Build Coastguard Worker     }
798*0e209d39SAndroid Build Coastguard Worker     UnicodeString name = defaultLSTM(script, status);
799*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return nullptr;
800*0e209d39SAndroid Build Coastguard Worker     CharString namebuf;
801*0e209d39SAndroid Build Coastguard Worker     namebuf.appendInvariantChars(name, status).truncate(namebuf.lastIndexOf('.'));
802*0e209d39SAndroid Build Coastguard Worker 
803*0e209d39SAndroid Build Coastguard Worker     LocalUResourceBundlePointer rb(
804*0e209d39SAndroid Build Coastguard Worker         ures_openDirect(U_ICUDATA_BRKITR, namebuf.data(), &status));
805*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status)) return nullptr;
806*0e209d39SAndroid Build Coastguard Worker 
807*0e209d39SAndroid Build Coastguard Worker     return CreateLSTMData(rb.orphan(), status);
808*0e209d39SAndroid Build Coastguard Worker }
809*0e209d39SAndroid Build Coastguard Worker 
CreateLSTMData(UResourceBundle * rb,UErrorCode & status)810*0e209d39SAndroid Build Coastguard Worker U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(UResourceBundle* rb, UErrorCode& status)
811*0e209d39SAndroid Build Coastguard Worker {
812*0e209d39SAndroid Build Coastguard Worker     return new LSTMData(rb, status);
813*0e209d39SAndroid Build Coastguard Worker }
814*0e209d39SAndroid Build Coastguard Worker 
815*0e209d39SAndroid Build Coastguard Worker U_CAPI const LanguageBreakEngine* U_EXPORT2
CreateLSTMBreakEngine(UScriptCode script,const LSTMData * data,UErrorCode & status)816*0e209d39SAndroid Build Coastguard Worker CreateLSTMBreakEngine(UScriptCode script, const LSTMData* data, UErrorCode& status)
817*0e209d39SAndroid Build Coastguard Worker {
818*0e209d39SAndroid Build Coastguard Worker     UnicodeString unicodeSetString;
819*0e209d39SAndroid Build Coastguard Worker     switch(script) {
820*0e209d39SAndroid Build Coastguard Worker         case USCRIPT_THAI:
821*0e209d39SAndroid Build Coastguard Worker             unicodeSetString = UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]");
822*0e209d39SAndroid Build Coastguard Worker             break;
823*0e209d39SAndroid Build Coastguard Worker         case USCRIPT_MYANMAR:
824*0e209d39SAndroid Build Coastguard Worker             unicodeSetString = UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]");
825*0e209d39SAndroid Build Coastguard Worker             break;
826*0e209d39SAndroid Build Coastguard Worker         default:
827*0e209d39SAndroid Build Coastguard Worker             delete data;
828*0e209d39SAndroid Build Coastguard Worker             return nullptr;
829*0e209d39SAndroid Build Coastguard Worker     }
830*0e209d39SAndroid Build Coastguard Worker     UnicodeSet unicodeSet;
831*0e209d39SAndroid Build Coastguard Worker     unicodeSet.applyPattern(unicodeSetString, status);
832*0e209d39SAndroid Build Coastguard Worker     const LanguageBreakEngine* engine = new LSTMBreakEngine(data, unicodeSet, status);
833*0e209d39SAndroid Build Coastguard Worker     if (U_FAILURE(status) || engine == nullptr) {
834*0e209d39SAndroid Build Coastguard Worker         if (engine != nullptr) {
835*0e209d39SAndroid Build Coastguard Worker             delete engine;
836*0e209d39SAndroid Build Coastguard Worker         } else {
837*0e209d39SAndroid Build Coastguard Worker             status = U_MEMORY_ALLOCATION_ERROR;
838*0e209d39SAndroid Build Coastguard Worker         }
839*0e209d39SAndroid Build Coastguard Worker         return nullptr;
840*0e209d39SAndroid Build Coastguard Worker     }
841*0e209d39SAndroid Build Coastguard Worker     return engine;
842*0e209d39SAndroid Build Coastguard Worker }
843*0e209d39SAndroid Build Coastguard Worker 
DeleteLSTMData(const LSTMData * data)844*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data)
845*0e209d39SAndroid Build Coastguard Worker {
846*0e209d39SAndroid Build Coastguard Worker     delete data;
847*0e209d39SAndroid Build Coastguard Worker }
848*0e209d39SAndroid Build Coastguard Worker 
LSTMDataName(const LSTMData * data)849*0e209d39SAndroid Build Coastguard Worker U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data)
850*0e209d39SAndroid Build Coastguard Worker {
851*0e209d39SAndroid Build Coastguard Worker     return data->fName;
852*0e209d39SAndroid Build Coastguard Worker }
853*0e209d39SAndroid Build Coastguard Worker 
854*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
855*0e209d39SAndroid Build Coastguard Worker 
856*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
857