1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "lang_id/common/fel/fel-parser.h"
18
19 #include <ctype.h>
20
21 #include <string>
22
23 #include "lang_id/common/lite_base/logging.h"
24 #include "lang_id/common/lite_strings/numbers.h"
25 #include "absl/strings/string_view.h"
26
27 namespace libtextclassifier3 {
28 namespace mobile {
29
30 namespace {
IsValidCharAtStartOfIdentifier(char c)31 inline bool IsValidCharAtStartOfIdentifier(char c) {
32 return isalpha(c) || (c == '_') || (c == '/');
33 }
34
35 // Returns true iff character c can appear inside an identifier.
IsValidCharInsideIdentifier(char c)36 inline bool IsValidCharInsideIdentifier(char c) {
37 return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
38 }
39
40 // Returns true iff character c can appear at the beginning of a number.
IsValidCharAtStartOfNumber(char c)41 inline bool IsValidCharAtStartOfNumber(char c) {
42 return isdigit(c) || (c == '+') || (c == '-');
43 }
44
45 // Returns true iff character c can appear inside a number.
IsValidCharInsideNumber(char c)46 inline bool IsValidCharInsideNumber(char c) {
47 return isdigit(c) || (c == '.');
48 }
49 } // namespace
50
Initialize(absl::string_view source)51 bool FELParser::Initialize(absl::string_view source) {
52 // Initialize parser state.
53 source_ = std::string(source);
54 current_ = source_.begin();
55 item_start_ = line_start_ = current_;
56 line_number_ = item_line_number_ = 1;
57
58 // Read first input item.
59 return NextItem();
60 }
61
ReportError(const std::string & error_message)62 void FELParser::ReportError(const std::string &error_message) {
63 const int position = item_start_ - line_start_ + 1;
64 const std::string line(line_start_, current_);
65
66 SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_
67 << ", position " << position << ": " << error_message
68 << "\n " << line << " <--HERE";
69 }
70
Next()71 void FELParser::Next() {
72 // Move to the next input character. If we are at a line break update line
73 // number and line start position.
74 if (CurrentChar() == '\n') {
75 ++line_number_;
76 ++current_;
77 line_start_ = current_;
78 } else {
79 ++current_;
80 }
81 }
82
NextItem()83 bool FELParser::NextItem() {
84 // Skip white space and comments.
85 while (!eos()) {
86 if (CurrentChar() == '#') {
87 // Skip comment.
88 while (!eos() && CurrentChar() != '\n') Next();
89 } else if (isspace(CurrentChar())) {
90 // Skip whitespace.
91 while (!eos() && isspace(CurrentChar())) Next();
92 } else {
93 break;
94 }
95 }
96
97 // Record start position for next item.
98 item_start_ = current_;
99 item_line_number_ = line_number_;
100
101 // Check for end of input.
102 if (eos()) {
103 item_type_ = END;
104 return true;
105 }
106
107 // Parse number.
108 if (IsValidCharAtStartOfNumber(CurrentChar())) {
109 std::string::iterator start = current_;
110 Next();
111 while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
112 item_text_.assign(start, current_);
113 item_type_ = NUMBER;
114 return true;
115 }
116
117 // Parse string.
118 if (CurrentChar() == '"') {
119 Next();
120 std::string::iterator start = current_;
121 while (CurrentChar() != '"') {
122 if (eos()) {
123 ReportError("Unterminated string");
124 return false;
125 }
126 Next();
127 }
128 item_text_.assign(start, current_);
129 item_type_ = STRING;
130 Next();
131 return true;
132 }
133
134 // Parse identifier name.
135 if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
136 std::string::iterator start = current_;
137 while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
138 Next();
139 }
140 item_text_.assign(start, current_);
141 item_type_ = NAME;
142 return true;
143 }
144
145 // Single character item.
146 item_type_ = CurrentChar();
147 Next();
148 return true;
149 }
150
Parse(const std::string & source,FeatureExtractorDescriptor * result)151 bool FELParser::Parse(const std::string &source,
152 FeatureExtractorDescriptor *result) {
153 // Initialize parser.
154 if (!Initialize(source)) {
155 return false;
156 }
157
158 while (item_type_ != END) {
159 // Current item should be a feature name.
160 if (item_type_ != NAME) {
161 ReportError("Feature type name expected");
162 return false;
163 }
164 std::string name = item_text_;
165 if (!NextItem()) {
166 return false;
167 }
168
169 if (item_type_ == '=') {
170 ReportError("Invalid syntax: feature expected");
171 return false;
172 } else {
173 // Parse feature.
174 FeatureFunctionDescriptor *descriptor = result->add_feature();
175 descriptor->set_type(name);
176 if (!ParseFeature(descriptor)) {
177 return false;
178 }
179 }
180 }
181
182 return true;
183 }
184
ParseFeature(FeatureFunctionDescriptor * result)185 bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) {
186 // Parse argument and parameters.
187 if (item_type_ == '(') {
188 if (!NextItem()) return false;
189 if (!ParseParameter(result)) return false;
190 while (item_type_ == ',') {
191 if (!NextItem()) return false;
192 if (!ParseParameter(result)) return false;
193 }
194
195 if (item_type_ != ')') {
196 ReportError(") expected");
197 return false;
198 }
199 if (!NextItem()) return false;
200 }
201
202 // Parse feature name.
203 if (item_type_ == ':') {
204 if (!NextItem()) return false;
205 if (item_type_ != NAME && item_type_ != STRING) {
206 ReportError("Feature name expected");
207 return false;
208 }
209 std::string name = item_text_;
210 if (!NextItem()) return false;
211
212 // Set feature name.
213 result->set_name(name);
214 }
215
216 // Parse sub-features.
217 if (item_type_ == '.') {
218 // Parse dotted sub-feature.
219 if (!NextItem()) return false;
220 if (item_type_ != NAME) {
221 ReportError("Feature type name expected");
222 return false;
223 }
224 std::string type = item_text_;
225 if (!NextItem()) return false;
226
227 // Parse sub-feature.
228 FeatureFunctionDescriptor *subfeature = result->add_feature();
229 subfeature->set_type(type);
230 if (!ParseFeature(subfeature)) return false;
231 } else if (item_type_ == '{') {
232 // Parse sub-feature block.
233 if (!NextItem()) return false;
234 while (item_type_ != '}') {
235 if (item_type_ != NAME) {
236 ReportError("Feature type name expected");
237 return false;
238 }
239 std::string type = item_text_;
240 if (!NextItem()) return false;
241
242 // Parse sub-feature.
243 FeatureFunctionDescriptor *subfeature = result->add_feature();
244 subfeature->set_type(type);
245 if (!ParseFeature(subfeature)) return false;
246 }
247 if (!NextItem()) return false;
248 }
249 return true;
250 }
251
ParseParameter(FeatureFunctionDescriptor * result)252 bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) {
253 if (item_type_ == NUMBER) {
254 int argument;
255 if (!LiteAtoi(item_text_, &argument)) {
256 ReportError("Unable to parse number");
257 return false;
258 }
259 if (!NextItem()) return false;
260
261 // Set default argument for feature.
262 result->set_argument(argument);
263 } else if (item_type_ == NAME) {
264 std::string name = item_text_;
265 if (!NextItem()) return false;
266 if (item_type_ != '=') {
267 ReportError("= expected");
268 return false;
269 }
270 if (!NextItem()) return false;
271 if (item_type_ >= END) {
272 ReportError("Parameter value expected");
273 return false;
274 }
275 std::string value = item_text_;
276 if (!NextItem()) return false;
277
278 // Add parameter to feature.
279 Parameter *parameter;
280 parameter = result->add_parameter();
281 parameter->set_name(name);
282 parameter->set_value(value);
283 } else {
284 ReportError("Syntax error in parameter list");
285 return false;
286 }
287 return true;
288 }
289
290 } // namespace mobile
291 } // namespace nlp_saft
292