xref: /aosp_15_r20/external/libtextclassifier/native/lang_id/common/fel/fel-parser.cc (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "lang_id/common/fel/fel-parser.h"
18 
19 #include <ctype.h>
20 
21 #include <string>
22 
23 #include "lang_id/common/lite_base/logging.h"
24 #include "lang_id/common/lite_strings/numbers.h"
25 #include "absl/strings/string_view.h"
26 
27 namespace libtextclassifier3 {
28 namespace mobile {
29 
30 namespace {
IsValidCharAtStartOfIdentifier(char c)31 inline bool IsValidCharAtStartOfIdentifier(char c) {
32   return isalpha(c) || (c == '_') || (c == '/');
33 }
34 
35 // Returns true iff character c can appear inside an identifier.
IsValidCharInsideIdentifier(char c)36 inline bool IsValidCharInsideIdentifier(char c) {
37   return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
38 }
39 
40 // Returns true iff character c can appear at the beginning of a number.
IsValidCharAtStartOfNumber(char c)41 inline bool IsValidCharAtStartOfNumber(char c) {
42   return isdigit(c) || (c == '+') || (c == '-');
43 }
44 
45 // Returns true iff character c can appear inside a number.
IsValidCharInsideNumber(char c)46 inline bool IsValidCharInsideNumber(char c) {
47   return isdigit(c) || (c == '.');
48 }
49 }  // namespace
50 
Initialize(absl::string_view source)51 bool FELParser::Initialize(absl::string_view source) {
52   // Initialize parser state.
53   source_ = std::string(source);
54   current_ = source_.begin();
55   item_start_ = line_start_ = current_;
56   line_number_ = item_line_number_ = 1;
57 
58   // Read first input item.
59   return NextItem();
60 }
61 
ReportError(const std::string & error_message)62 void FELParser::ReportError(const std::string &error_message) {
63   const int position = item_start_ - line_start_ + 1;
64   const std::string line(line_start_, current_);
65 
66   SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_
67                    << ", position " << position << ": " << error_message
68                    << "\n    " << line << " <--HERE";
69 }
70 
Next()71 void FELParser::Next() {
72   // Move to the next input character. If we are at a line break update line
73   // number and line start position.
74   if (CurrentChar() == '\n') {
75     ++line_number_;
76     ++current_;
77     line_start_ = current_;
78   } else {
79     ++current_;
80   }
81 }
82 
NextItem()83 bool FELParser::NextItem() {
84   // Skip white space and comments.
85   while (!eos()) {
86     if (CurrentChar() == '#') {
87       // Skip comment.
88       while (!eos() && CurrentChar() != '\n') Next();
89     } else if (isspace(CurrentChar())) {
90       // Skip whitespace.
91       while (!eos() && isspace(CurrentChar())) Next();
92     } else {
93       break;
94     }
95   }
96 
97   // Record start position for next item.
98   item_start_ = current_;
99   item_line_number_ = line_number_;
100 
101   // Check for end of input.
102   if (eos()) {
103     item_type_ = END;
104     return true;
105   }
106 
107   // Parse number.
108   if (IsValidCharAtStartOfNumber(CurrentChar())) {
109     std::string::iterator start = current_;
110     Next();
111     while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
112     item_text_.assign(start, current_);
113     item_type_ = NUMBER;
114     return true;
115   }
116 
117   // Parse string.
118   if (CurrentChar() == '"') {
119     Next();
120     std::string::iterator start = current_;
121     while (CurrentChar() != '"') {
122       if (eos()) {
123         ReportError("Unterminated string");
124         return false;
125       }
126       Next();
127     }
128     item_text_.assign(start, current_);
129     item_type_ = STRING;
130     Next();
131     return true;
132   }
133 
134   // Parse identifier name.
135   if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
136     std::string::iterator start = current_;
137     while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
138       Next();
139     }
140     item_text_.assign(start, current_);
141     item_type_ = NAME;
142     return true;
143   }
144 
145   // Single character item.
146   item_type_ = CurrentChar();
147   Next();
148   return true;
149 }
150 
Parse(const std::string & source,FeatureExtractorDescriptor * result)151 bool FELParser::Parse(const std::string &source,
152                       FeatureExtractorDescriptor *result) {
153   // Initialize parser.
154   if (!Initialize(source)) {
155     return false;
156   }
157 
158   while (item_type_ != END) {
159     // Current item should be a feature name.
160     if (item_type_ != NAME) {
161       ReportError("Feature type name expected");
162       return false;
163     }
164     std::string name = item_text_;
165     if (!NextItem()) {
166       return false;
167     }
168 
169     if (item_type_ == '=') {
170       ReportError("Invalid syntax: feature expected");
171       return false;
172     } else {
173       // Parse feature.
174       FeatureFunctionDescriptor *descriptor = result->add_feature();
175       descriptor->set_type(name);
176       if (!ParseFeature(descriptor)) {
177         return false;
178       }
179     }
180   }
181 
182   return true;
183 }
184 
ParseFeature(FeatureFunctionDescriptor * result)185 bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) {
186   // Parse argument and parameters.
187   if (item_type_ == '(') {
188     if (!NextItem()) return false;
189     if (!ParseParameter(result)) return false;
190     while (item_type_ == ',') {
191       if (!NextItem()) return false;
192       if (!ParseParameter(result)) return false;
193     }
194 
195     if (item_type_ != ')') {
196       ReportError(") expected");
197       return false;
198     }
199     if (!NextItem()) return false;
200   }
201 
202   // Parse feature name.
203   if (item_type_ == ':') {
204     if (!NextItem()) return false;
205     if (item_type_ != NAME && item_type_ != STRING) {
206       ReportError("Feature name expected");
207       return false;
208     }
209     std::string name = item_text_;
210     if (!NextItem()) return false;
211 
212     // Set feature name.
213     result->set_name(name);
214   }
215 
216   // Parse sub-features.
217   if (item_type_ == '.') {
218     // Parse dotted sub-feature.
219     if (!NextItem()) return false;
220     if (item_type_ != NAME) {
221       ReportError("Feature type name expected");
222       return false;
223     }
224     std::string type = item_text_;
225     if (!NextItem()) return false;
226 
227     // Parse sub-feature.
228     FeatureFunctionDescriptor *subfeature = result->add_feature();
229     subfeature->set_type(type);
230     if (!ParseFeature(subfeature)) return false;
231   } else if (item_type_ == '{') {
232     // Parse sub-feature block.
233     if (!NextItem()) return false;
234     while (item_type_ != '}') {
235       if (item_type_ != NAME) {
236         ReportError("Feature type name expected");
237         return false;
238       }
239       std::string type = item_text_;
240       if (!NextItem()) return false;
241 
242       // Parse sub-feature.
243       FeatureFunctionDescriptor *subfeature = result->add_feature();
244       subfeature->set_type(type);
245       if (!ParseFeature(subfeature)) return false;
246     }
247     if (!NextItem()) return false;
248   }
249   return true;
250 }
251 
ParseParameter(FeatureFunctionDescriptor * result)252 bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) {
253   if (item_type_ == NUMBER) {
254     int argument;
255     if (!LiteAtoi(item_text_, &argument)) {
256       ReportError("Unable to parse number");
257       return false;
258     }
259     if (!NextItem()) return false;
260 
261     // Set default argument for feature.
262     result->set_argument(argument);
263   } else if (item_type_ == NAME) {
264     std::string name = item_text_;
265     if (!NextItem()) return false;
266     if (item_type_ != '=') {
267       ReportError("= expected");
268       return false;
269     }
270     if (!NextItem()) return false;
271     if (item_type_ >= END) {
272       ReportError("Parameter value expected");
273       return false;
274     }
275     std::string value = item_text_;
276     if (!NextItem()) return false;
277 
278     // Add parameter to feature.
279     Parameter *parameter;
280     parameter = result->add_parameter();
281     parameter->set_name(name);
282     parameter->set_value(value);
283   } else {
284     ReportError("Syntax error in parameter list");
285     return false;
286   }
287   return true;
288 }
289 
290 }  // namespace mobile
291 }  // namespace nlp_saft
292