1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker *
4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker *
8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker *
10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker */
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Worker #include "annotator/datetime/grammar-parser.h"
18*993b0882SAndroid Build Coastguard Worker
19*993b0882SAndroid Build Coastguard Worker #include <set>
20*993b0882SAndroid Build Coastguard Worker #include <unordered_set>
21*993b0882SAndroid Build Coastguard Worker
22*993b0882SAndroid Build Coastguard Worker #include "annotator/datetime/datetime-grounder.h"
23*993b0882SAndroid Build Coastguard Worker #include "annotator/model_generated.h"
24*993b0882SAndroid Build Coastguard Worker #include "annotator/types.h"
25*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/analyzer.h"
26*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/evaluated-derivation.h"
27*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/parsing/derivation.h"
28*993b0882SAndroid Build Coastguard Worker
29*993b0882SAndroid Build Coastguard Worker using ::libtextclassifier3::grammar::EvaluatedDerivation;
30*993b0882SAndroid Build Coastguard Worker using ::libtextclassifier3::grammar::datetime::UngroundedDatetime;
31*993b0882SAndroid Build Coastguard Worker
32*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
33*993b0882SAndroid Build Coastguard Worker
GrammarDatetimeParser(const grammar::Analyzer & analyzer,const DatetimeGrounder & datetime_grounder,const float target_classification_score,const float priority_score,ModeFlag enabled_modes)34*993b0882SAndroid Build Coastguard Worker GrammarDatetimeParser::GrammarDatetimeParser(
35*993b0882SAndroid Build Coastguard Worker const grammar::Analyzer& analyzer,
36*993b0882SAndroid Build Coastguard Worker const DatetimeGrounder& datetime_grounder,
37*993b0882SAndroid Build Coastguard Worker const float target_classification_score, const float priority_score,
38*993b0882SAndroid Build Coastguard Worker ModeFlag enabled_modes)
39*993b0882SAndroid Build Coastguard Worker : analyzer_(analyzer),
40*993b0882SAndroid Build Coastguard Worker datetime_grounder_(datetime_grounder),
41*993b0882SAndroid Build Coastguard Worker target_classification_score_(target_classification_score),
42*993b0882SAndroid Build Coastguard Worker priority_score_(priority_score),
43*993b0882SAndroid Build Coastguard Worker enabled_modes_(enabled_modes) {}
44*993b0882SAndroid Build Coastguard Worker
Parse(const std::string & input,const int64 reference_time_ms_utc,const std::string & reference_timezone,const LocaleList & locale_list,ModeFlag mode,AnnotationUsecase annotation_usecase,bool anchor_start_end) const45*993b0882SAndroid Build Coastguard Worker StatusOr<std::vector<DatetimeParseResultSpan>> GrammarDatetimeParser::Parse(
46*993b0882SAndroid Build Coastguard Worker const std::string& input, const int64 reference_time_ms_utc,
47*993b0882SAndroid Build Coastguard Worker const std::string& reference_timezone, const LocaleList& locale_list,
48*993b0882SAndroid Build Coastguard Worker ModeFlag mode, AnnotationUsecase annotation_usecase,
49*993b0882SAndroid Build Coastguard Worker bool anchor_start_end) const {
50*993b0882SAndroid Build Coastguard Worker return Parse(UTF8ToUnicodeText(input, /*do_copy=*/false),
51*993b0882SAndroid Build Coastguard Worker reference_time_ms_utc, reference_timezone, locale_list, mode,
52*993b0882SAndroid Build Coastguard Worker annotation_usecase, anchor_start_end);
53*993b0882SAndroid Build Coastguard Worker }
54*993b0882SAndroid Build Coastguard Worker
Parse(const UnicodeText & input,const int64 reference_time_ms_utc,const std::string & reference_timezone,const LocaleList & locale_list,ModeFlag mode,AnnotationUsecase annotation_usecase,bool anchor_start_end) const55*993b0882SAndroid Build Coastguard Worker StatusOr<std::vector<DatetimeParseResultSpan>> GrammarDatetimeParser::Parse(
56*993b0882SAndroid Build Coastguard Worker const UnicodeText& input, const int64 reference_time_ms_utc,
57*993b0882SAndroid Build Coastguard Worker const std::string& reference_timezone, const LocaleList& locale_list,
58*993b0882SAndroid Build Coastguard Worker ModeFlag mode, AnnotationUsecase annotation_usecase,
59*993b0882SAndroid Build Coastguard Worker bool anchor_start_end) const {
60*993b0882SAndroid Build Coastguard Worker if (!(enabled_modes_ & mode)) {
61*993b0882SAndroid Build Coastguard Worker return std::vector<DatetimeParseResultSpan>();
62*993b0882SAndroid Build Coastguard Worker }
63*993b0882SAndroid Build Coastguard Worker
64*993b0882SAndroid Build Coastguard Worker std::vector<DatetimeParseResultSpan> results;
65*993b0882SAndroid Build Coastguard Worker UnsafeArena arena(/*block_size=*/16 << 10);
66*993b0882SAndroid Build Coastguard Worker std::vector<Locale> locales = locale_list.GetLocales();
67*993b0882SAndroid Build Coastguard Worker // If the locale list is empty then datetime regex expression will still
68*993b0882SAndroid Build Coastguard Worker // execute but in grammar based parser the rules are associated with local
69*993b0882SAndroid Build Coastguard Worker // and engine will not run if the locale list is empty. In an unlikely
70*993b0882SAndroid Build Coastguard Worker // scenario when locale is not mentioned fallback to en-*.
71*993b0882SAndroid Build Coastguard Worker if (locales.empty()) {
72*993b0882SAndroid Build Coastguard Worker locales.emplace_back(Locale::FromBCP47("en"));
73*993b0882SAndroid Build Coastguard Worker }
74*993b0882SAndroid Build Coastguard Worker TC3_ASSIGN_OR_RETURN(
75*993b0882SAndroid Build Coastguard Worker const std::vector<EvaluatedDerivation> evaluated_derivations,
76*993b0882SAndroid Build Coastguard Worker analyzer_.Parse(input, locales, &arena,
77*993b0882SAndroid Build Coastguard Worker /*deduplicate_derivations=*/false));
78*993b0882SAndroid Build Coastguard Worker
79*993b0882SAndroid Build Coastguard Worker std::vector<EvaluatedDerivation> valid_evaluated_derivations;
80*993b0882SAndroid Build Coastguard Worker for (const EvaluatedDerivation& evaluated_derivation :
81*993b0882SAndroid Build Coastguard Worker evaluated_derivations) {
82*993b0882SAndroid Build Coastguard Worker if (evaluated_derivation.value) {
83*993b0882SAndroid Build Coastguard Worker if (evaluated_derivation.value->Has<flatbuffers::Table>()) {
84*993b0882SAndroid Build Coastguard Worker const UngroundedDatetime* ungrounded_datetime =
85*993b0882SAndroid Build Coastguard Worker evaluated_derivation.value->Table<UngroundedDatetime>();
86*993b0882SAndroid Build Coastguard Worker if (datetime_grounder_.IsValidUngroundedDatetime(ungrounded_datetime)) {
87*993b0882SAndroid Build Coastguard Worker valid_evaluated_derivations.emplace_back(evaluated_derivation);
88*993b0882SAndroid Build Coastguard Worker }
89*993b0882SAndroid Build Coastguard Worker }
90*993b0882SAndroid Build Coastguard Worker }
91*993b0882SAndroid Build Coastguard Worker }
92*993b0882SAndroid Build Coastguard Worker valid_evaluated_derivations =
93*993b0882SAndroid Build Coastguard Worker grammar::DeduplicateDerivations(valid_evaluated_derivations);
94*993b0882SAndroid Build Coastguard Worker for (const EvaluatedDerivation& evaluated_derivation :
95*993b0882SAndroid Build Coastguard Worker valid_evaluated_derivations) {
96*993b0882SAndroid Build Coastguard Worker if (evaluated_derivation.value) {
97*993b0882SAndroid Build Coastguard Worker if (evaluated_derivation.value->Has<flatbuffers::Table>()) {
98*993b0882SAndroid Build Coastguard Worker const UngroundedDatetime* ungrounded_datetime =
99*993b0882SAndroid Build Coastguard Worker evaluated_derivation.value->Table<UngroundedDatetime>();
100*993b0882SAndroid Build Coastguard Worker if ((ungrounded_datetime->annotation_usecases() &
101*993b0882SAndroid Build Coastguard Worker (1 << annotation_usecase)) == 0) {
102*993b0882SAndroid Build Coastguard Worker continue;
103*993b0882SAndroid Build Coastguard Worker }
104*993b0882SAndroid Build Coastguard Worker const StatusOr<std::vector<DatetimeParseResult>>&
105*993b0882SAndroid Build Coastguard Worker datetime_parse_results = datetime_grounder_.Ground(
106*993b0882SAndroid Build Coastguard Worker reference_time_ms_utc, reference_timezone,
107*993b0882SAndroid Build Coastguard Worker locale_list.GetReferenceLocale(), ungrounded_datetime);
108*993b0882SAndroid Build Coastguard Worker TC3_ASSIGN_OR_RETURN(
109*993b0882SAndroid Build Coastguard Worker const std::vector<DatetimeParseResult>& parse_datetime,
110*993b0882SAndroid Build Coastguard Worker datetime_parse_results);
111*993b0882SAndroid Build Coastguard Worker DatetimeParseResultSpan datetime_parse_result_span;
112*993b0882SAndroid Build Coastguard Worker datetime_parse_result_span.target_classification_score =
113*993b0882SAndroid Build Coastguard Worker target_classification_score_;
114*993b0882SAndroid Build Coastguard Worker datetime_parse_result_span.priority_score = priority_score_;
115*993b0882SAndroid Build Coastguard Worker datetime_parse_result_span.data.reserve(parse_datetime.size());
116*993b0882SAndroid Build Coastguard Worker datetime_parse_result_span.data.insert(
117*993b0882SAndroid Build Coastguard Worker datetime_parse_result_span.data.end(), parse_datetime.begin(),
118*993b0882SAndroid Build Coastguard Worker parse_datetime.end());
119*993b0882SAndroid Build Coastguard Worker datetime_parse_result_span.span =
120*993b0882SAndroid Build Coastguard Worker evaluated_derivation.parse_tree->codepoint_span;
121*993b0882SAndroid Build Coastguard Worker
122*993b0882SAndroid Build Coastguard Worker results.emplace_back(datetime_parse_result_span);
123*993b0882SAndroid Build Coastguard Worker }
124*993b0882SAndroid Build Coastguard Worker }
125*993b0882SAndroid Build Coastguard Worker }
126*993b0882SAndroid Build Coastguard Worker return results;
127*993b0882SAndroid Build Coastguard Worker }
128*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3
129