xref: /aosp_15_r20/external/libtextclassifier/native/annotator/duration/duration.cc (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker  * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker  *
4*993b0882SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker  *
8*993b0882SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker  *
10*993b0882SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker  * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker  */
16*993b0882SAndroid Build Coastguard Worker 
17*993b0882SAndroid Build Coastguard Worker #include "annotator/duration/duration.h"
18*993b0882SAndroid Build Coastguard Worker 
19*993b0882SAndroid Build Coastguard Worker #include <climits>
20*993b0882SAndroid Build Coastguard Worker #include <cstdlib>
21*993b0882SAndroid Build Coastguard Worker 
22*993b0882SAndroid Build Coastguard Worker #include "annotator/collections.h"
23*993b0882SAndroid Build Coastguard Worker #include "annotator/model_generated.h"
24*993b0882SAndroid Build Coastguard Worker #include "annotator/types.h"
25*993b0882SAndroid Build Coastguard Worker #include "utils/base/logging.h"
26*993b0882SAndroid Build Coastguard Worker #include "utils/base/macros.h"
27*993b0882SAndroid Build Coastguard Worker #include "utils/strings/numbers.h"
28*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unicodetext.h"
29*993b0882SAndroid Build Coastguard Worker 
30*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
31*993b0882SAndroid Build Coastguard Worker 
32*993b0882SAndroid Build Coastguard Worker using DurationUnit = internal::DurationUnit;
33*993b0882SAndroid Build Coastguard Worker 
34*993b0882SAndroid Build Coastguard Worker namespace internal {
35*993b0882SAndroid Build Coastguard Worker 
36*993b0882SAndroid Build Coastguard Worker namespace {
ToLowerString(const std::string & str,const UniLib * unilib)37*993b0882SAndroid Build Coastguard Worker std::string ToLowerString(const std::string& str, const UniLib* unilib) {
38*993b0882SAndroid Build Coastguard Worker   return unilib->ToLowerText(UTF8ToUnicodeText(str, /*do_copy=*/false))
39*993b0882SAndroid Build Coastguard Worker       .ToUTF8String();
40*993b0882SAndroid Build Coastguard Worker }
41*993b0882SAndroid Build Coastguard Worker 
FillDurationUnitMap(const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> * expressions,DurationUnit duration_unit,std::unordered_map<std::string,DurationUnit> * target_map,const UniLib * unilib)42*993b0882SAndroid Build Coastguard Worker void FillDurationUnitMap(
43*993b0882SAndroid Build Coastguard Worker     const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>*
44*993b0882SAndroid Build Coastguard Worker         expressions,
45*993b0882SAndroid Build Coastguard Worker     DurationUnit duration_unit,
46*993b0882SAndroid Build Coastguard Worker     std::unordered_map<std::string, DurationUnit>* target_map,
47*993b0882SAndroid Build Coastguard Worker     const UniLib* unilib) {
48*993b0882SAndroid Build Coastguard Worker   if (expressions == nullptr) {
49*993b0882SAndroid Build Coastguard Worker     return;
50*993b0882SAndroid Build Coastguard Worker   }
51*993b0882SAndroid Build Coastguard Worker 
52*993b0882SAndroid Build Coastguard Worker   for (const flatbuffers::String* expression_string : *expressions) {
53*993b0882SAndroid Build Coastguard Worker     (*target_map)[ToLowerString(expression_string->c_str(), unilib)] =
54*993b0882SAndroid Build Coastguard Worker         duration_unit;
55*993b0882SAndroid Build Coastguard Worker   }
56*993b0882SAndroid Build Coastguard Worker }
57*993b0882SAndroid Build Coastguard Worker }  // namespace
58*993b0882SAndroid Build Coastguard Worker 
BuildTokenToDurationUnitMapping(const DurationAnnotatorOptions * options,const UniLib * unilib)59*993b0882SAndroid Build Coastguard Worker std::unordered_map<std::string, DurationUnit> BuildTokenToDurationUnitMapping(
60*993b0882SAndroid Build Coastguard Worker     const DurationAnnotatorOptions* options, const UniLib* unilib) {
61*993b0882SAndroid Build Coastguard Worker   std::unordered_map<std::string, DurationUnit> mapping;
62*993b0882SAndroid Build Coastguard Worker   FillDurationUnitMap(options->week_expressions(), DurationUnit::WEEK, &mapping,
63*993b0882SAndroid Build Coastguard Worker                       unilib);
64*993b0882SAndroid Build Coastguard Worker   FillDurationUnitMap(options->day_expressions(), DurationUnit::DAY, &mapping,
65*993b0882SAndroid Build Coastguard Worker                       unilib);
66*993b0882SAndroid Build Coastguard Worker   FillDurationUnitMap(options->hour_expressions(), DurationUnit::HOUR, &mapping,
67*993b0882SAndroid Build Coastguard Worker                       unilib);
68*993b0882SAndroid Build Coastguard Worker   FillDurationUnitMap(options->minute_expressions(), DurationUnit::MINUTE,
69*993b0882SAndroid Build Coastguard Worker                       &mapping, unilib);
70*993b0882SAndroid Build Coastguard Worker   FillDurationUnitMap(options->second_expressions(), DurationUnit::SECOND,
71*993b0882SAndroid Build Coastguard Worker                       &mapping, unilib);
72*993b0882SAndroid Build Coastguard Worker   return mapping;
73*993b0882SAndroid Build Coastguard Worker }
74*993b0882SAndroid Build Coastguard Worker 
BuildStringSet(const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> * strings,const UniLib * unilib)75*993b0882SAndroid Build Coastguard Worker std::unordered_set<std::string> BuildStringSet(
76*993b0882SAndroid Build Coastguard Worker     const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>*
77*993b0882SAndroid Build Coastguard Worker         strings,
78*993b0882SAndroid Build Coastguard Worker     const UniLib* unilib) {
79*993b0882SAndroid Build Coastguard Worker   std::unordered_set<std::string> result;
80*993b0882SAndroid Build Coastguard Worker   if (strings == nullptr) {
81*993b0882SAndroid Build Coastguard Worker     return result;
82*993b0882SAndroid Build Coastguard Worker   }
83*993b0882SAndroid Build Coastguard Worker 
84*993b0882SAndroid Build Coastguard Worker   for (const flatbuffers::String* string_value : *strings) {
85*993b0882SAndroid Build Coastguard Worker     result.insert(ToLowerString(string_value->c_str(), unilib));
86*993b0882SAndroid Build Coastguard Worker   }
87*993b0882SAndroid Build Coastguard Worker 
88*993b0882SAndroid Build Coastguard Worker   return result;
89*993b0882SAndroid Build Coastguard Worker }
90*993b0882SAndroid Build Coastguard Worker 
BuildInt32Set(const flatbuffers::Vector<int32> * ints)91*993b0882SAndroid Build Coastguard Worker std::unordered_set<int32> BuildInt32Set(
92*993b0882SAndroid Build Coastguard Worker     const flatbuffers::Vector<int32>* ints) {
93*993b0882SAndroid Build Coastguard Worker   std::unordered_set<int32> result;
94*993b0882SAndroid Build Coastguard Worker   if (ints == nullptr) {
95*993b0882SAndroid Build Coastguard Worker     return result;
96*993b0882SAndroid Build Coastguard Worker   }
97*993b0882SAndroid Build Coastguard Worker 
98*993b0882SAndroid Build Coastguard Worker   for (const int32 int_value : *ints) {
99*993b0882SAndroid Build Coastguard Worker     result.insert(int_value);
100*993b0882SAndroid Build Coastguard Worker   }
101*993b0882SAndroid Build Coastguard Worker 
102*993b0882SAndroid Build Coastguard Worker   return result;
103*993b0882SAndroid Build Coastguard Worker }
104*993b0882SAndroid Build Coastguard Worker 
105*993b0882SAndroid Build Coastguard Worker // Get the dangling quantity unit e.g. for 2 hours 10, 10 would have the unit
106*993b0882SAndroid Build Coastguard Worker // "minute".
GetDanglingQuantityUnit(const DurationUnit main_unit)107*993b0882SAndroid Build Coastguard Worker DurationUnit GetDanglingQuantityUnit(const DurationUnit main_unit) {
108*993b0882SAndroid Build Coastguard Worker   switch (main_unit) {
109*993b0882SAndroid Build Coastguard Worker     case DurationUnit::HOUR:
110*993b0882SAndroid Build Coastguard Worker       return DurationUnit::MINUTE;
111*993b0882SAndroid Build Coastguard Worker     case DurationUnit::MINUTE:
112*993b0882SAndroid Build Coastguard Worker       return DurationUnit::SECOND;
113*993b0882SAndroid Build Coastguard Worker     case DurationUnit::UNKNOWN:
114*993b0882SAndroid Build Coastguard Worker       TC3_LOG(ERROR) << "Requesting parse of UNKNOWN duration duration_unit.";
115*993b0882SAndroid Build Coastguard Worker       TC3_FALLTHROUGH_INTENDED;
116*993b0882SAndroid Build Coastguard Worker     case DurationUnit::WEEK:
117*993b0882SAndroid Build Coastguard Worker     case DurationUnit::DAY:
118*993b0882SAndroid Build Coastguard Worker     case DurationUnit::SECOND:
119*993b0882SAndroid Build Coastguard Worker       // We only support dangling units for hours and minutes.
120*993b0882SAndroid Build Coastguard Worker       return DurationUnit::UNKNOWN;
121*993b0882SAndroid Build Coastguard Worker   }
122*993b0882SAndroid Build Coastguard Worker }
123*993b0882SAndroid Build Coastguard Worker }  // namespace internal
124*993b0882SAndroid Build Coastguard Worker 
ClassifyText(const UnicodeText & context,CodepointSpan selection_indices,AnnotationUsecase annotation_usecase,ClassificationResult * classification_result) const125*993b0882SAndroid Build Coastguard Worker bool DurationAnnotator::ClassifyText(
126*993b0882SAndroid Build Coastguard Worker     const UnicodeText& context, CodepointSpan selection_indices,
127*993b0882SAndroid Build Coastguard Worker     AnnotationUsecase annotation_usecase,
128*993b0882SAndroid Build Coastguard Worker     ClassificationResult* classification_result) const {
129*993b0882SAndroid Build Coastguard Worker   if (!options_->enabled() ||
130*993b0882SAndroid Build Coastguard Worker       ((options_->enabled_annotation_usecases() & (1 << annotation_usecase))) ==
131*993b0882SAndroid Build Coastguard Worker           0 ||
132*993b0882SAndroid Build Coastguard Worker       !(options_->enabled_modes() & ModeFlag_CLASSIFICATION)) {
133*993b0882SAndroid Build Coastguard Worker     return false;
134*993b0882SAndroid Build Coastguard Worker   }
135*993b0882SAndroid Build Coastguard Worker 
136*993b0882SAndroid Build Coastguard Worker   const UnicodeText selection =
137*993b0882SAndroid Build Coastguard Worker       UnicodeText::Substring(context, selection_indices.first,
138*993b0882SAndroid Build Coastguard Worker                              selection_indices.second, /*do_copy=*/false);
139*993b0882SAndroid Build Coastguard Worker   const std::vector<Token> tokens = feature_processor_->Tokenize(selection);
140*993b0882SAndroid Build Coastguard Worker 
141*993b0882SAndroid Build Coastguard Worker   AnnotatedSpan annotated_span;
142*993b0882SAndroid Build Coastguard Worker   if (tokens.empty() ||
143*993b0882SAndroid Build Coastguard Worker       FindDurationStartingAt(context, tokens, 0, &annotated_span) !=
144*993b0882SAndroid Build Coastguard Worker           tokens.size()) {
145*993b0882SAndroid Build Coastguard Worker     return false;
146*993b0882SAndroid Build Coastguard Worker   }
147*993b0882SAndroid Build Coastguard Worker 
148*993b0882SAndroid Build Coastguard Worker   TC3_DCHECK(!annotated_span.classification.empty());
149*993b0882SAndroid Build Coastguard Worker 
150*993b0882SAndroid Build Coastguard Worker   *classification_result = annotated_span.classification[0];
151*993b0882SAndroid Build Coastguard Worker   return true;
152*993b0882SAndroid Build Coastguard Worker }
153*993b0882SAndroid Build Coastguard Worker 
FindAll(const UnicodeText & context,const std::vector<Token> & tokens,AnnotationUsecase annotation_usecase,ModeFlag mode,std::vector<AnnotatedSpan> * results) const154*993b0882SAndroid Build Coastguard Worker bool DurationAnnotator::FindAll(const UnicodeText& context,
155*993b0882SAndroid Build Coastguard Worker                                 const std::vector<Token>& tokens,
156*993b0882SAndroid Build Coastguard Worker                                 AnnotationUsecase annotation_usecase,
157*993b0882SAndroid Build Coastguard Worker                                 ModeFlag mode,
158*993b0882SAndroid Build Coastguard Worker                                 std::vector<AnnotatedSpan>* results) const {
159*993b0882SAndroid Build Coastguard Worker   if (!options_->enabled() ||
160*993b0882SAndroid Build Coastguard Worker       ((options_->enabled_annotation_usecases() & (1 << annotation_usecase))) ==
161*993b0882SAndroid Build Coastguard Worker           0 ||
162*993b0882SAndroid Build Coastguard Worker       !(options_->enabled_modes() & mode)) {
163*993b0882SAndroid Build Coastguard Worker     return true;
164*993b0882SAndroid Build Coastguard Worker   }
165*993b0882SAndroid Build Coastguard Worker 
166*993b0882SAndroid Build Coastguard Worker   for (int i = 0; i < tokens.size();) {
167*993b0882SAndroid Build Coastguard Worker     AnnotatedSpan span;
168*993b0882SAndroid Build Coastguard Worker     const int next_i = FindDurationStartingAt(context, tokens, i, &span);
169*993b0882SAndroid Build Coastguard Worker     if (next_i != i) {
170*993b0882SAndroid Build Coastguard Worker       results->push_back(span);
171*993b0882SAndroid Build Coastguard Worker       i = next_i;
172*993b0882SAndroid Build Coastguard Worker     } else {
173*993b0882SAndroid Build Coastguard Worker       i++;
174*993b0882SAndroid Build Coastguard Worker     }
175*993b0882SAndroid Build Coastguard Worker   }
176*993b0882SAndroid Build Coastguard Worker   return true;
177*993b0882SAndroid Build Coastguard Worker }
178*993b0882SAndroid Build Coastguard Worker 
FindDurationStartingAt(const UnicodeText & context,const std::vector<Token> & tokens,int start_token_index,AnnotatedSpan * result) const179*993b0882SAndroid Build Coastguard Worker int DurationAnnotator::FindDurationStartingAt(const UnicodeText& context,
180*993b0882SAndroid Build Coastguard Worker                                               const std::vector<Token>& tokens,
181*993b0882SAndroid Build Coastguard Worker                                               int start_token_index,
182*993b0882SAndroid Build Coastguard Worker                                               AnnotatedSpan* result) const {
183*993b0882SAndroid Build Coastguard Worker   CodepointIndex start_index = kInvalidIndex;
184*993b0882SAndroid Build Coastguard Worker   CodepointIndex end_index = kInvalidIndex;
185*993b0882SAndroid Build Coastguard Worker 
186*993b0882SAndroid Build Coastguard Worker   bool has_quantity = false;
187*993b0882SAndroid Build Coastguard Worker   ParsedDurationAtom parsed_duration;
188*993b0882SAndroid Build Coastguard Worker 
189*993b0882SAndroid Build Coastguard Worker   std::vector<ParsedDurationAtom> parsed_duration_atoms;
190*993b0882SAndroid Build Coastguard Worker 
191*993b0882SAndroid Build Coastguard Worker   // This is the core algorithm for finding the duration expressions. It
192*993b0882SAndroid Build Coastguard Worker   // basically iterates over tokens and changes the state variables above as it
193*993b0882SAndroid Build Coastguard Worker   // goes.
194*993b0882SAndroid Build Coastguard Worker   int token_index;
195*993b0882SAndroid Build Coastguard Worker   int quantity_end_index;
196*993b0882SAndroid Build Coastguard Worker   for (token_index = start_token_index; token_index < tokens.size();
197*993b0882SAndroid Build Coastguard Worker        token_index++) {
198*993b0882SAndroid Build Coastguard Worker     const Token& token = tokens[token_index];
199*993b0882SAndroid Build Coastguard Worker 
200*993b0882SAndroid Build Coastguard Worker     if (ParseQuantityToken(token, &parsed_duration)) {
201*993b0882SAndroid Build Coastguard Worker       has_quantity = true;
202*993b0882SAndroid Build Coastguard Worker       if (start_index == kInvalidIndex) {
203*993b0882SAndroid Build Coastguard Worker         start_index = token.start;
204*993b0882SAndroid Build Coastguard Worker       }
205*993b0882SAndroid Build Coastguard Worker       quantity_end_index = token.end;
206*993b0882SAndroid Build Coastguard Worker     } else if (((!options_->require_quantity() || has_quantity) &&
207*993b0882SAndroid Build Coastguard Worker                 ParseDurationUnitToken(token, &parsed_duration.unit)) ||
208*993b0882SAndroid Build Coastguard Worker                ParseQuantityDurationUnitToken(token, &parsed_duration)) {
209*993b0882SAndroid Build Coastguard Worker       if (start_index == kInvalidIndex) {
210*993b0882SAndroid Build Coastguard Worker         start_index = token.start;
211*993b0882SAndroid Build Coastguard Worker       }
212*993b0882SAndroid Build Coastguard Worker       end_index = token.end;
213*993b0882SAndroid Build Coastguard Worker       parsed_duration_atoms.push_back(parsed_duration);
214*993b0882SAndroid Build Coastguard Worker       has_quantity = false;
215*993b0882SAndroid Build Coastguard Worker       parsed_duration = ParsedDurationAtom();
216*993b0882SAndroid Build Coastguard Worker     } else if (ParseFillerToken(token)) {
217*993b0882SAndroid Build Coastguard Worker     } else {
218*993b0882SAndroid Build Coastguard Worker       break;
219*993b0882SAndroid Build Coastguard Worker     }
220*993b0882SAndroid Build Coastguard Worker   }
221*993b0882SAndroid Build Coastguard Worker 
222*993b0882SAndroid Build Coastguard Worker   if (parsed_duration_atoms.empty()) {
223*993b0882SAndroid Build Coastguard Worker     return start_token_index;
224*993b0882SAndroid Build Coastguard Worker   }
225*993b0882SAndroid Build Coastguard Worker 
226*993b0882SAndroid Build Coastguard Worker   const bool parse_ended_without_unit_for_last_mentioned_quantity =
227*993b0882SAndroid Build Coastguard Worker       has_quantity;
228*993b0882SAndroid Build Coastguard Worker 
229*993b0882SAndroid Build Coastguard Worker   if (parse_ended_without_unit_for_last_mentioned_quantity) {
230*993b0882SAndroid Build Coastguard Worker     const DurationUnit main_unit = parsed_duration_atoms.rbegin()->unit;
231*993b0882SAndroid Build Coastguard Worker     if (parsed_duration.plus_half) {
232*993b0882SAndroid Build Coastguard Worker       // Process "and half" suffix.
233*993b0882SAndroid Build Coastguard Worker       end_index = quantity_end_index;
234*993b0882SAndroid Build Coastguard Worker       ParsedDurationAtom atom = ParsedDurationAtom::Half();
235*993b0882SAndroid Build Coastguard Worker       atom.unit = main_unit;
236*993b0882SAndroid Build Coastguard Worker       parsed_duration_atoms.push_back(atom);
237*993b0882SAndroid Build Coastguard Worker     } else if (options_->enable_dangling_quantity_interpretation()) {
238*993b0882SAndroid Build Coastguard Worker       // Process dangling quantity.
239*993b0882SAndroid Build Coastguard Worker       ParsedDurationAtom atom;
240*993b0882SAndroid Build Coastguard Worker       atom.value = parsed_duration.value;
241*993b0882SAndroid Build Coastguard Worker       atom.unit = GetDanglingQuantityUnit(main_unit);
242*993b0882SAndroid Build Coastguard Worker       if (atom.unit != DurationUnit::UNKNOWN) {
243*993b0882SAndroid Build Coastguard Worker         end_index = quantity_end_index;
244*993b0882SAndroid Build Coastguard Worker         parsed_duration_atoms.push_back(atom);
245*993b0882SAndroid Build Coastguard Worker       }
246*993b0882SAndroid Build Coastguard Worker     }
247*993b0882SAndroid Build Coastguard Worker   }
248*993b0882SAndroid Build Coastguard Worker 
249*993b0882SAndroid Build Coastguard Worker   ClassificationResult classification{Collections::Duration(),
250*993b0882SAndroid Build Coastguard Worker                                       options_->score()};
251*993b0882SAndroid Build Coastguard Worker   classification.priority_score = options_->priority_score();
252*993b0882SAndroid Build Coastguard Worker   classification.duration_ms =
253*993b0882SAndroid Build Coastguard Worker       ParsedDurationAtomsToMillis(parsed_duration_atoms);
254*993b0882SAndroid Build Coastguard Worker 
255*993b0882SAndroid Build Coastguard Worker   result->span = feature_processor_->StripBoundaryCodepoints(
256*993b0882SAndroid Build Coastguard Worker       context, {start_index, end_index});
257*993b0882SAndroid Build Coastguard Worker   result->classification.push_back(classification);
258*993b0882SAndroid Build Coastguard Worker   result->source = AnnotatedSpan::Source::DURATION;
259*993b0882SAndroid Build Coastguard Worker 
260*993b0882SAndroid Build Coastguard Worker   return token_index;
261*993b0882SAndroid Build Coastguard Worker }
262*993b0882SAndroid Build Coastguard Worker 
ParsedDurationAtomsToMillis(const std::vector<ParsedDurationAtom> & atoms) const263*993b0882SAndroid Build Coastguard Worker int64 DurationAnnotator::ParsedDurationAtomsToMillis(
264*993b0882SAndroid Build Coastguard Worker     const std::vector<ParsedDurationAtom>& atoms) const {
265*993b0882SAndroid Build Coastguard Worker   int64 result = 0;
266*993b0882SAndroid Build Coastguard Worker   for (auto atom : atoms) {
267*993b0882SAndroid Build Coastguard Worker     int multiplier;
268*993b0882SAndroid Build Coastguard Worker     switch (atom.unit) {
269*993b0882SAndroid Build Coastguard Worker       case DurationUnit::WEEK:
270*993b0882SAndroid Build Coastguard Worker         multiplier = 7 * 24 * 60 * 60 * 1000;
271*993b0882SAndroid Build Coastguard Worker         break;
272*993b0882SAndroid Build Coastguard Worker       case DurationUnit::DAY:
273*993b0882SAndroid Build Coastguard Worker         multiplier = 24 * 60 * 60 * 1000;
274*993b0882SAndroid Build Coastguard Worker         break;
275*993b0882SAndroid Build Coastguard Worker       case DurationUnit::HOUR:
276*993b0882SAndroid Build Coastguard Worker         multiplier = 60 * 60 * 1000;
277*993b0882SAndroid Build Coastguard Worker         break;
278*993b0882SAndroid Build Coastguard Worker       case DurationUnit::MINUTE:
279*993b0882SAndroid Build Coastguard Worker         multiplier = 60 * 1000;
280*993b0882SAndroid Build Coastguard Worker         break;
281*993b0882SAndroid Build Coastguard Worker       case DurationUnit::SECOND:
282*993b0882SAndroid Build Coastguard Worker         multiplier = 1000;
283*993b0882SAndroid Build Coastguard Worker         break;
284*993b0882SAndroid Build Coastguard Worker       case DurationUnit::UNKNOWN:
285*993b0882SAndroid Build Coastguard Worker         TC3_LOG(ERROR) << "Requesting parse of UNKNOWN duration duration_unit.";
286*993b0882SAndroid Build Coastguard Worker         return -1;
287*993b0882SAndroid Build Coastguard Worker         break;
288*993b0882SAndroid Build Coastguard Worker     }
289*993b0882SAndroid Build Coastguard Worker 
290*993b0882SAndroid Build Coastguard Worker     double value = atom.value;
291*993b0882SAndroid Build Coastguard Worker     // This condition handles expressions like "an hour", where the quantity is
292*993b0882SAndroid Build Coastguard Worker     // not specified. In this case we assume quantity 1. Except for cases like
293*993b0882SAndroid Build Coastguard Worker     // "half hour".
294*993b0882SAndroid Build Coastguard Worker     if (value == 0 && !atom.plus_half) {
295*993b0882SAndroid Build Coastguard Worker       value = 1;
296*993b0882SAndroid Build Coastguard Worker     }
297*993b0882SAndroid Build Coastguard Worker     result += value * multiplier;
298*993b0882SAndroid Build Coastguard Worker     result += atom.plus_half * multiplier / 2;
299*993b0882SAndroid Build Coastguard Worker   }
300*993b0882SAndroid Build Coastguard Worker   return result;
301*993b0882SAndroid Build Coastguard Worker }
302*993b0882SAndroid Build Coastguard Worker 
ParseQuantityToken(const Token & token,ParsedDurationAtom * value) const303*993b0882SAndroid Build Coastguard Worker bool DurationAnnotator::ParseQuantityToken(const Token& token,
304*993b0882SAndroid Build Coastguard Worker                                            ParsedDurationAtom* value) const {
305*993b0882SAndroid Build Coastguard Worker   if (token.value.empty()) {
306*993b0882SAndroid Build Coastguard Worker     return false;
307*993b0882SAndroid Build Coastguard Worker   }
308*993b0882SAndroid Build Coastguard Worker 
309*993b0882SAndroid Build Coastguard Worker   std::string token_value_buffer;
310*993b0882SAndroid Build Coastguard Worker   const std::string& token_value = feature_processor_->StripBoundaryCodepoints(
311*993b0882SAndroid Build Coastguard Worker       token.value, &token_value_buffer);
312*993b0882SAndroid Build Coastguard Worker   const std::string& lowercase_token_value =
313*993b0882SAndroid Build Coastguard Worker       internal::ToLowerString(token_value, unilib_);
314*993b0882SAndroid Build Coastguard Worker 
315*993b0882SAndroid Build Coastguard Worker   if (half_expressions_.find(lowercase_token_value) !=
316*993b0882SAndroid Build Coastguard Worker       half_expressions_.end()) {
317*993b0882SAndroid Build Coastguard Worker     value->plus_half = true;
318*993b0882SAndroid Build Coastguard Worker     return true;
319*993b0882SAndroid Build Coastguard Worker   }
320*993b0882SAndroid Build Coastguard Worker 
321*993b0882SAndroid Build Coastguard Worker   double parsed_value;
322*993b0882SAndroid Build Coastguard Worker   if (ParseDouble(lowercase_token_value.c_str(), &parsed_value)) {
323*993b0882SAndroid Build Coastguard Worker     value->value = parsed_value;
324*993b0882SAndroid Build Coastguard Worker     return true;
325*993b0882SAndroid Build Coastguard Worker   }
326*993b0882SAndroid Build Coastguard Worker 
327*993b0882SAndroid Build Coastguard Worker   return false;
328*993b0882SAndroid Build Coastguard Worker }
329*993b0882SAndroid Build Coastguard Worker 
ParseDurationUnitToken(const Token & token,DurationUnit * duration_unit) const330*993b0882SAndroid Build Coastguard Worker bool DurationAnnotator::ParseDurationUnitToken(
331*993b0882SAndroid Build Coastguard Worker     const Token& token, DurationUnit* duration_unit) const {
332*993b0882SAndroid Build Coastguard Worker   std::string token_value_buffer;
333*993b0882SAndroid Build Coastguard Worker   const std::string& token_value = feature_processor_->StripBoundaryCodepoints(
334*993b0882SAndroid Build Coastguard Worker       token.value, &token_value_buffer);
335*993b0882SAndroid Build Coastguard Worker   const std::string& lowercase_token_value =
336*993b0882SAndroid Build Coastguard Worker       internal::ToLowerString(token_value, unilib_);
337*993b0882SAndroid Build Coastguard Worker 
338*993b0882SAndroid Build Coastguard Worker   const auto it = token_value_to_duration_unit_.find(lowercase_token_value);
339*993b0882SAndroid Build Coastguard Worker   if (it == token_value_to_duration_unit_.end()) {
340*993b0882SAndroid Build Coastguard Worker     return false;
341*993b0882SAndroid Build Coastguard Worker   }
342*993b0882SAndroid Build Coastguard Worker 
343*993b0882SAndroid Build Coastguard Worker   *duration_unit = it->second;
344*993b0882SAndroid Build Coastguard Worker   return true;
345*993b0882SAndroid Build Coastguard Worker }
346*993b0882SAndroid Build Coastguard Worker 
ParseQuantityDurationUnitToken(const Token & token,ParsedDurationAtom * value) const347*993b0882SAndroid Build Coastguard Worker bool DurationAnnotator::ParseQuantityDurationUnitToken(
348*993b0882SAndroid Build Coastguard Worker     const Token& token, ParsedDurationAtom* value) const {
349*993b0882SAndroid Build Coastguard Worker   if (token.value.empty()) {
350*993b0882SAndroid Build Coastguard Worker     return false;
351*993b0882SAndroid Build Coastguard Worker   }
352*993b0882SAndroid Build Coastguard Worker 
353*993b0882SAndroid Build Coastguard Worker   Token sub_token;
354*993b0882SAndroid Build Coastguard Worker   bool has_quantity = false;
355*993b0882SAndroid Build Coastguard Worker   for (const char c : token.value) {
356*993b0882SAndroid Build Coastguard Worker     if (sub_token_separator_codepoints_.find(c) !=
357*993b0882SAndroid Build Coastguard Worker         sub_token_separator_codepoints_.end()) {
358*993b0882SAndroid Build Coastguard Worker       if (has_quantity || !ParseQuantityToken(sub_token, value)) {
359*993b0882SAndroid Build Coastguard Worker         return false;
360*993b0882SAndroid Build Coastguard Worker       }
361*993b0882SAndroid Build Coastguard Worker       has_quantity = true;
362*993b0882SAndroid Build Coastguard Worker 
363*993b0882SAndroid Build Coastguard Worker       sub_token = Token();
364*993b0882SAndroid Build Coastguard Worker     } else {
365*993b0882SAndroid Build Coastguard Worker       sub_token.value += c;
366*993b0882SAndroid Build Coastguard Worker     }
367*993b0882SAndroid Build Coastguard Worker   }
368*993b0882SAndroid Build Coastguard Worker 
369*993b0882SAndroid Build Coastguard Worker   return (!options_->require_quantity() || has_quantity) &&
370*993b0882SAndroid Build Coastguard Worker          ParseDurationUnitToken(sub_token, &(value->unit));
371*993b0882SAndroid Build Coastguard Worker }
372*993b0882SAndroid Build Coastguard Worker 
ParseFillerToken(const Token & token) const373*993b0882SAndroid Build Coastguard Worker bool DurationAnnotator::ParseFillerToken(const Token& token) const {
374*993b0882SAndroid Build Coastguard Worker   std::string token_value_buffer;
375*993b0882SAndroid Build Coastguard Worker   const std::string& token_value = feature_processor_->StripBoundaryCodepoints(
376*993b0882SAndroid Build Coastguard Worker       token.value, &token_value_buffer);
377*993b0882SAndroid Build Coastguard Worker   const std::string& lowercase_token_value =
378*993b0882SAndroid Build Coastguard Worker       internal::ToLowerString(token_value, unilib_);
379*993b0882SAndroid Build Coastguard Worker 
380*993b0882SAndroid Build Coastguard Worker   if (filler_expressions_.find(lowercase_token_value) ==
381*993b0882SAndroid Build Coastguard Worker       filler_expressions_.end()) {
382*993b0882SAndroid Build Coastguard Worker     return false;
383*993b0882SAndroid Build Coastguard Worker   }
384*993b0882SAndroid Build Coastguard Worker 
385*993b0882SAndroid Build Coastguard Worker   return true;
386*993b0882SAndroid Build Coastguard Worker }
387*993b0882SAndroid Build Coastguard Worker 
388*993b0882SAndroid Build Coastguard Worker }  // namespace libtextclassifier3
389