1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Generic feature extractor for extracting features from objects. The feature 18 // extractor can be used for extracting features from any object. The feature 19 // extractor and feature function classes are template classes that have to 20 // be instantiated for extracting feature from a specific object type. 21 // 22 // A feature extractor consists of a hierarchy of feature functions. Each 23 // feature function extracts one or more feature type and value pairs from the 24 // object. 25 // 26 // The feature extractor has a modular design where new feature functions can be 27 // registered as components. The feature extractor is initialized from a 28 // descriptor represented by a protocol buffer. The feature extractor can also 29 // be initialized from a text-based source specification of the feature 30 // extractor. Feature specification parsers can be added as components. By 31 // default the feature extractor can be read from an ASCII protocol buffer or in 32 // a simple feature modeling language (fml). 33 34 // A feature function is invoked with a focus. Nested feature function can be 35 // invoked with another focus determined by the parent feature function. 36 37 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_ 38 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_ 39 40 #include <stddef.h> 41 42 #include <string> 43 #include <vector> 44 45 #include "lang_id/common/fel/feature-descriptors.h" 46 #include "lang_id/common/fel/feature-types.h" 47 #include "lang_id/common/fel/task-context.h" 48 #include "lang_id/common/fel/workspace.h" 49 #include "lang_id/common/lite_base/attributes.h" 50 #include "lang_id/common/lite_base/integral-types.h" 51 #include "lang_id/common/lite_base/logging.h" 52 #include "lang_id/common/lite_base/macros.h" 53 #include "lang_id/common/registry.h" 54 #include "lang_id/common/stl-util.h" 55 #include "absl/strings/string_view.h" 56 57 namespace libtextclassifier3 { 58 namespace mobile { 59 60 // TODO(djweiss) Clean this up as well. 61 // Use the same type for feature values as is used for predicated. 62 typedef int64 Predicate; 63 typedef Predicate FeatureValue; 64 65 // A union used to represent discrete and continuous feature values. 66 union FloatFeatureValue { 67 public: FloatFeatureValue(FeatureValue v)68 explicit FloatFeatureValue(FeatureValue v) : discrete_value(v) {} FloatFeatureValue(uint32 i,float w)69 FloatFeatureValue(uint32 i, float w) : id(i), weight(w) {} 70 FeatureValue discrete_value; 71 struct { 72 uint32 id; 73 float weight; 74 }; 75 }; 76 77 // A feature vector contains feature type and value pairs. 78 class FeatureVector { 79 public: FeatureVector()80 FeatureVector() {} 81 82 // Adds feature type and value pair to feature vector. add(FeatureType * type,FeatureValue value)83 void add(FeatureType *type, FeatureValue value) { 84 features_.emplace_back(type, value); 85 } 86 87 // Removes all elements from the feature vector. clear()88 void clear() { features_.clear(); } 89 90 // Returns the number of elements in the feature vector. size()91 int size() const { return features_.size(); } 92 93 // Reserves space in the underlying feature vector. reserve(int n)94 void reserve(int n) { features_.reserve(n); } 95 96 // Returns feature type for an element in the feature vector. type(int index)97 FeatureType *type(int index) const { return features_[index].type; } 98 99 // Returns feature value for an element in the feature vector. value(int index)100 FeatureValue value(int index) const { return features_[index].value; } 101 102 private: 103 // Structure for holding feature type and value pairs. 104 struct Element { ElementElement105 Element() : type(nullptr), value(-1) {} ElementElement106 Element(FeatureType *t, FeatureValue v) : type(t), value(v) {} 107 108 FeatureType *type; 109 FeatureValue value; 110 }; 111 112 // Array for storing feature vector elements. 113 std::vector<Element> features_; 114 115 SAFTM_DISALLOW_COPY_AND_ASSIGN(FeatureVector); 116 }; 117 118 // The generic feature extractor is the type-independent part of a feature 119 // extractor. This holds the descriptor for the feature extractor and the 120 // collection of feature types used in the feature extractor. The feature 121 // types are not available until FeatureExtractor<>::Init() has been called. 122 class GenericFeatureExtractor { 123 public: 124 GenericFeatureExtractor(); 125 virtual ~GenericFeatureExtractor(); 126 127 // Initializes the feature extractor from the FEL specification |source|. 128 // 129 // Returns true on success, false otherwise (e.g., FEL syntax error). 130 SAFTM_MUST_USE_RESULT bool Parse(const std::string &source); 131 132 // Returns the feature extractor descriptor. descriptor()133 const FeatureExtractorDescriptor &descriptor() const { return descriptor_; } mutable_descriptor()134 FeatureExtractorDescriptor *mutable_descriptor() { return &descriptor_; } 135 136 // Returns the number of feature types in the feature extractor. Invalid 137 // before Init() has been called. feature_types()138 int feature_types() const { return feature_types_.size(); } 139 140 protected: 141 // Initializes the feature types used by the extractor. Called from 142 // FeatureExtractor<>::Init(). 143 // 144 // Returns true on success, false otherwise. 145 SAFTM_MUST_USE_RESULT bool InitializeFeatureTypes(); 146 147 private: 148 // Initializes the top-level feature functions. 149 // 150 // Returns true on success, false otherwise. 151 SAFTM_MUST_USE_RESULT virtual bool InitializeFeatureFunctions() = 0; 152 153 // Returns all feature types used by the extractor. The feature types are 154 // added to the result array. 155 virtual void GetFeatureTypes(std::vector<FeatureType *> *types) const = 0; 156 157 // Descriptor for the feature extractor. This is a protocol buffer that 158 // contains all the information about the feature extractor. The feature 159 // functions are initialized from the information in the descriptor. 160 FeatureExtractorDescriptor descriptor_; 161 162 // All feature types used by the feature extractor. The collection of all the 163 // feature types describes the feature space of the feature set produced by 164 // the feature extractor. Not owned. 165 std::vector<FeatureType *> feature_types_; 166 }; 167 168 // The generic feature function is the type-independent part of a feature 169 // function. Each feature function is associated with the descriptor that it is 170 // instantiated from. The feature types associated with this feature function 171 // will be established by the time FeatureExtractor<>::Init() completes. 172 class GenericFeatureFunction { 173 public: 174 // A feature value that represents the absence of a value. 175 static constexpr FeatureValue kNone = -1; 176 177 GenericFeatureFunction(); 178 virtual ~GenericFeatureFunction(); 179 180 // Sets up the feature function. NB: FeatureTypes of nested functions are not 181 // guaranteed to be available until Init(). 182 // 183 // Returns true on success, false otherwise. Setup(TaskContext * context)184 SAFTM_MUST_USE_RESULT virtual bool Setup(TaskContext *context) { 185 return true; 186 } 187 188 // Initializes the feature function. NB: The FeatureType of this function must 189 // be established when this method completes. 190 // 191 // Returns true on success, false otherwise. Init(TaskContext * context)192 SAFTM_MUST_USE_RESULT virtual bool Init(TaskContext *context) { return true; } 193 194 // Requests workspaces from a registry to obtain indices into a WorkspaceSet 195 // for any Workspace objects used by this feature function. NB: This will be 196 // called after Init(), so it can depend on resources and arguments. RequestWorkspaces(WorkspaceRegistry * registry)197 virtual void RequestWorkspaces(WorkspaceRegistry *registry) {} 198 199 // Appends the feature types produced by the feature function to types. The 200 // default implementation appends feature_type(), if non-null. Invalid 201 // before Init() has been called. 202 virtual void GetFeatureTypes(std::vector<FeatureType *> *types) const; 203 204 // Returns the feature type for feature produced by this feature function. If 205 // the feature function produces features of different types this returns 206 // null. Invalid before Init() has been called. 207 virtual FeatureType *GetFeatureType() const; 208 209 // Returns value of parameter |name| from the feature function descriptor. 210 // If the parameter is not present, returns the indicated |default_value|. 211 std::string GetParameter(const std::string &name, 212 const std::string &default_value) const; 213 214 // Returns value of int parameter |name| from feature function descriptor. 215 // If the parameter is not present, or its value can't be parsed as an int, 216 // returns |default_value|. 217 int GetIntParameter(const std::string &name, int default_value) const; 218 219 // Returns value of bool parameter |name| from feature function descriptor. 220 // If the parameter is not present, or its value is not "true" or "false", 221 // returns |default_value|. NOTE: this method is case sensitive, it doesn't 222 // do any lower-casing. 223 bool GetBoolParameter(const std::string &name, bool default_value) const; 224 225 // Returns the FEL function description for the feature function, i.e. the 226 // name and parameters without the nested features. FunctionName()227 std::string FunctionName() const { 228 std::string output; 229 ToFELFunction(*descriptor_, &output); 230 return output; 231 } 232 233 // Returns the prefix for nested feature functions. This is the prefix of this 234 // feature function concatenated with the feature function name. SubPrefix()235 std::string SubPrefix() const { 236 return prefix_.empty() ? FunctionName() : prefix_ + "." + FunctionName(); 237 } 238 239 // Returns/sets the feature extractor this function belongs to. extractor()240 const GenericFeatureExtractor *extractor() const { return extractor_; } set_extractor(const GenericFeatureExtractor * extractor)241 void set_extractor(const GenericFeatureExtractor *extractor) { 242 extractor_ = extractor; 243 } 244 245 // Returns/sets the feature function descriptor. descriptor()246 const FeatureFunctionDescriptor *descriptor() const { return descriptor_; } set_descriptor(const FeatureFunctionDescriptor * descriptor)247 void set_descriptor(const FeatureFunctionDescriptor *descriptor) { 248 descriptor_ = descriptor; 249 } 250 251 // Returns a descriptive name for the feature function. The name is taken from 252 // the descriptor for the feature function. If the name is empty or the 253 // feature function is a variable the name is the FEL representation of the 254 // feature, including the prefix. 255 std::string name() const; 256 257 // Returns the argument from the feature function descriptor. It defaults to 258 // 0 if the argument has not been specified. argument()259 int argument() const { 260 return descriptor_->has_argument() ? descriptor_->argument() : 0; 261 } 262 263 // Returns/sets/clears function name prefix. prefix()264 const std::string &prefix() const { return prefix_; } set_prefix(absl::string_view prefix)265 void set_prefix(absl::string_view prefix) { prefix_ = std::string(prefix); } 266 267 protected: 268 // Returns the feature type for single-type feature functions. feature_type()269 FeatureType *feature_type() const { return feature_type_; } 270 271 // Sets the feature type for single-type feature functions. This takes 272 // ownership of feature_type. Can only be called once. set_feature_type(FeatureType * feature_type)273 void set_feature_type(FeatureType *feature_type) { 274 SAFTM_CHECK_EQ(feature_type_, nullptr); 275 feature_type_ = feature_type; 276 } 277 278 private: 279 // Feature extractor this feature function belongs to. Not owned. Set to a 280 // pointer != nullptr as soon as this object is created by Instantiate(). 281 // Normal methods can safely assume this is != nullptr. 282 const GenericFeatureExtractor *extractor_ = nullptr; 283 284 // Descriptor for feature function. Not owned. Set to a pointer != nullptr 285 // as soon as this object is created by Instantiate(). Normal methods can 286 // safely assume this is != nullptr. 287 const FeatureFunctionDescriptor *descriptor_ = nullptr; 288 289 // Feature type for features produced by this feature function. If the 290 // feature function produces features of multiple feature types this is null 291 // and the feature function must return it's feature types in 292 // GetFeatureTypes(). Owned. 293 FeatureType *feature_type_ = nullptr; 294 295 // Prefix used for sub-feature types of this function. 296 std::string prefix_; 297 }; 298 299 // Feature function that can extract features from an object. Templated on 300 // two type arguments: 301 // 302 // OBJ: The "object" from which features are extracted; e.g., a sentence. This 303 // should be a plain type, rather than a reference or pointer. 304 // 305 // ARGS: A set of 0 or more types that are used to "index" into some part of the 306 // object that should be extracted, e.g. an int token index for a sentence 307 // object. This should not be a reference type. 308 template <class OBJ, class... ARGS> 309 class FeatureFunction 310 : public GenericFeatureFunction, 311 public RegisterableClass<FeatureFunction<OBJ, ARGS...> > { 312 public: 313 using Self = FeatureFunction<OBJ, ARGS...>; 314 315 // Preprocesses the object. This will be called prior to calling Evaluate() 316 // or Compute() on that object. Preprocess(WorkspaceSet * workspaces,const OBJ * object)317 virtual void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const {} 318 319 // Appends features computed from the object and focus to the result. The 320 // default implementation delegates to Compute(), adding a single value if 321 // available. Multi-valued feature functions must override this method. Evaluate(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args,FeatureVector * result)322 virtual void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, 323 ARGS... args, FeatureVector *result) const { 324 FeatureValue value = Compute(workspaces, object, args...); 325 if (value != kNone) result->add(feature_type(), value); 326 } 327 328 // Returns a feature value computed from the object and focus, or kNone if no 329 // value is computed. Single-valued feature functions only need to override 330 // this method. Compute(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args)331 virtual FeatureValue Compute(const WorkspaceSet &workspaces, 332 const OBJ &object, ARGS... args) const { 333 return kNone; 334 } 335 336 // Instantiates a new feature function in a feature extractor from a feature 337 // descriptor. 338 // 339 // Returns a pointer to the newly-created object if everything goes well. 340 // Returns nullptr if the feature function could not be instantiated (e.g., if 341 // the function with that name is not registered; this usually happens because 342 // the relevant cc_library was not linked-in). Instantiate(const GenericFeatureExtractor * extractor,const FeatureFunctionDescriptor * fd,absl::string_view prefix)343 static Self *Instantiate(const GenericFeatureExtractor *extractor, 344 const FeatureFunctionDescriptor *fd, 345 absl::string_view prefix) { 346 Self *f = Self::Create(fd->type()); 347 if (f != nullptr) { 348 f->set_extractor(extractor); 349 f->set_descriptor(fd); 350 f->set_prefix(prefix); 351 } 352 return f; 353 } 354 355 private: 356 // Special feature function class for resolving variable references. The type 357 // of the feature function is used for resolving the variable reference. When 358 // evaluated it will either get the feature value(s) from the variable portion 359 // of the feature vector, if present, or otherwise it will call the referenced 360 // feature extractor function directly to extract the feature(s). 361 class Reference; 362 }; 363 364 // Base class for features with nested feature functions. The nested functions 365 // are of type NES, which may be different from the type of the parent function. 366 // NB: NestedFeatureFunction will ensure that all initialization of nested 367 // functions takes place during Setup() and Init() -- after the nested features 368 // are initialized, the parent feature is initialized via SetupNested() and 369 // InitNested(). Alternatively, a derived classes that overrides Setup() and 370 // Init() directly should call Parent::Setup(), Parent::Init(), etc. first. 371 // 372 // Note: NestedFeatureFunction cannot know how to call Preprocess, Evaluate, or 373 // Compute, since the nested functions may be of a different type. 374 template <class NES, class OBJ, class... ARGS> 375 class NestedFeatureFunction : public FeatureFunction<OBJ, ARGS...> { 376 public: 377 using Parent = NestedFeatureFunction<NES, OBJ, ARGS...>; 378 379 // Clean up nested functions. ~NestedFeatureFunction()380 ~NestedFeatureFunction() override { utils::STLDeleteElements(&nested_); } 381 382 // By default, just appends the nested feature types. GetFeatureTypes(std::vector<FeatureType * > * types)383 void GetFeatureTypes(std::vector<FeatureType *> *types) const override { 384 SAFTM_CHECK(!this->nested().empty()) 385 << "Nested features require nested features to be defined."; 386 for (auto *function : nested_) function->GetFeatureTypes(types); 387 } 388 389 // Sets up the nested features. 390 // 391 // Returns true on success, false otherwise. Setup(TaskContext * context)392 SAFTM_MUST_USE_RESULT bool Setup(TaskContext *context) override { 393 bool success = CreateNested(this->extractor(), this->descriptor(), &nested_, 394 this->SubPrefix()); 395 if (!success) return false; 396 for (auto *function : nested_) { 397 if (!function->Setup(context)) return false; 398 } 399 if (!SetupNested(context)) return false; 400 return true; 401 } 402 403 // Sets up this NestedFeatureFunction specifically. 404 // 405 // Returns true on success, false otherwise. SetupNested(TaskContext * context)406 SAFTM_MUST_USE_RESULT virtual bool SetupNested(TaskContext *context) { 407 return true; 408 } 409 410 // Initializes the nested features. 411 // 412 // Returns true on success, false otherwise. Init(TaskContext * context)413 SAFTM_MUST_USE_RESULT bool Init(TaskContext *context) override { 414 for (auto *function : nested_) { 415 if (!function->Init(context)) return false; 416 } 417 if (!InitNested(context)) return false; 418 return true; 419 } 420 421 // Initializes this NestedFeatureFunction specifically. 422 // 423 // Returns true on success, false otherwise. InitNested(TaskContext * context)424 SAFTM_MUST_USE_RESULT virtual bool InitNested(TaskContext *context) { 425 return true; 426 } 427 428 // Gets all the workspaces needed for the nested functions. RequestWorkspaces(WorkspaceRegistry * registry)429 void RequestWorkspaces(WorkspaceRegistry *registry) override { 430 for (auto *function : nested_) function->RequestWorkspaces(registry); 431 } 432 433 // Returns the list of nested feature functions. nested()434 const std::vector<NES *> &nested() const { return nested_; } 435 436 // Instantiates nested feature functions for a feature function. Creates and 437 // initializes one feature function for each sub-descriptor in the feature 438 // descriptor. 439 // 440 // Returns true on success, false otherwise. CreateNested(const GenericFeatureExtractor * extractor,const FeatureFunctionDescriptor * fd,std::vector<NES * > * functions,absl::string_view prefix)441 SAFTM_MUST_USE_RESULT static bool CreateNested( 442 const GenericFeatureExtractor *extractor, 443 const FeatureFunctionDescriptor *fd, std::vector<NES *> *functions, 444 absl::string_view prefix) { 445 for (int i = 0; i < fd->feature_size(); ++i) { 446 const FeatureFunctionDescriptor &sub = fd->feature(i); 447 NES *f = NES::Instantiate(extractor, &sub, prefix); 448 if (f == nullptr) return false; 449 functions->push_back(f); 450 } 451 return true; 452 } 453 454 protected: 455 // The nested feature functions, if any, in order of declaration in the 456 // feature descriptor. Owned. 457 std::vector<NES *> nested_; 458 }; 459 460 // Base class for a nested feature function that takes nested features with the 461 // same signature as these features, i.e. a meta feature. For this class, we can 462 // provide preprocessing of the nested features. 463 template <class OBJ, class... ARGS> 464 class MetaFeatureFunction 465 : public NestedFeatureFunction<FeatureFunction<OBJ, ARGS...>, OBJ, 466 ARGS...> { 467 public: 468 // Preprocesses using the nested features. Preprocess(WorkspaceSet * workspaces,const OBJ * object)469 void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const override { 470 for (auto *function : this->nested_) { 471 function->Preprocess(workspaces, object); 472 } 473 } 474 }; 475 476 // Template for a special type of locator: The locator of type 477 // FeatureFunction<OBJ, ARGS...> calls nested functions of type 478 // FeatureFunction<OBJ, IDX, ARGS...>, where the derived class DER is 479 // responsible for translating by providing the following: 480 // 481 // // Gets the new additional focus. 482 // IDX GetFocus(const WorkspaceSet &workspaces, const OBJ &object); 483 // 484 // This is useful to e.g. add a token focus to a parser state based on some 485 // desired property of that state. 486 template <class DER, class OBJ, class IDX, class... ARGS> 487 class FeatureAddFocusLocator 488 : public NestedFeatureFunction<FeatureFunction<OBJ, IDX, ARGS...>, OBJ, 489 ARGS...> { 490 public: Preprocess(WorkspaceSet * workspaces,const OBJ * object)491 void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const override { 492 for (auto *function : this->nested_) { 493 function->Preprocess(workspaces, object); 494 } 495 } 496 Evaluate(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args,FeatureVector * result)497 void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, ARGS... args, 498 FeatureVector *result) const override { 499 IDX focus = 500 static_cast<const DER *>(this)->GetFocus(workspaces, object, args...); 501 for (auto *function : this->nested()) { 502 function->Evaluate(workspaces, object, focus, args..., result); 503 } 504 } 505 506 // Returns the first nested feature's computed value. Compute(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args)507 FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object, 508 ARGS... args) const override { 509 IDX focus = 510 static_cast<const DER *>(this)->GetFocus(workspaces, object, args...); 511 return this->nested()[0]->Compute(workspaces, object, focus, args...); 512 } 513 }; 514 515 // CRTP feature locator class. This is a meta feature that modifies ARGS and 516 // then calls the nested feature functions with the modified ARGS. Note that in 517 // order for this template to work correctly, all of ARGS must be types for 518 // which the reference operator & can be interpreted as a pointer to the 519 // argument. The derived class DER must implement the UpdateFocus method which 520 // takes pointers to the ARGS arguments: 521 // 522 // // Updates the current arguments. 523 // void UpdateArgs(const OBJ &object, ARGS *...args) const; 524 template <class DER, class OBJ, class... ARGS> 525 class FeatureLocator : public MetaFeatureFunction<OBJ, ARGS...> { 526 public: 527 // Feature locators have an additional check that there is no intrinsic type. GetFeatureTypes(std::vector<FeatureType * > * types)528 void GetFeatureTypes(std::vector<FeatureType *> *types) const override { 529 SAFTM_CHECK_EQ(this->feature_type(), nullptr) 530 << "FeatureLocators should not have an intrinsic type."; 531 MetaFeatureFunction<OBJ, ARGS...>::GetFeatureTypes(types); 532 } 533 534 // Evaluates the locator. Evaluate(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args,FeatureVector * result)535 void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, ARGS... args, 536 FeatureVector *result) const override { 537 static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...); 538 for (auto *function : this->nested()) { 539 function->Evaluate(workspaces, object, args..., result); 540 } 541 } 542 543 // Returns the first nested feature's computed value. Compute(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args)544 FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object, 545 ARGS... args) const override { 546 static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...); 547 return this->nested()[0]->Compute(workspaces, object, args...); 548 } 549 }; 550 551 // Feature extractor for extracting features from objects of a certain class. 552 // Template type parameters are as defined for FeatureFunction. 553 template <class OBJ, class... ARGS> 554 class FeatureExtractor : public GenericFeatureExtractor { 555 public: 556 // Feature function type for top-level functions in the feature extractor. 557 typedef FeatureFunction<OBJ, ARGS...> Function; 558 typedef FeatureExtractor<OBJ, ARGS...> Self; 559 560 // Feature locator type for the feature extractor. 561 template <class DER> 562 using Locator = FeatureLocator<DER, OBJ, ARGS...>; 563 564 // Initializes feature extractor. FeatureExtractor()565 FeatureExtractor() {} 566 ~FeatureExtractor()567 ~FeatureExtractor() override { utils::STLDeleteElements(&functions_); } 568 569 // Sets up the feature extractor. Note that only top-level functions exist 570 // until Setup() is called. This does not take ownership over the context, 571 // which must outlive this. 572 // 573 // Returns true on success, false otherwise. Setup(TaskContext * context)574 SAFTM_MUST_USE_RESULT bool Setup(TaskContext *context) { 575 for (Function *function : functions_) { 576 if (!function->Setup(context)) return false; 577 } 578 return true; 579 } 580 581 // Initializes the feature extractor. Must be called after Setup(). This 582 // does not take ownership over the context, which must outlive this. 583 // 584 // Returns true on success, false otherwise. Init(TaskContext * context)585 SAFTM_MUST_USE_RESULT bool Init(TaskContext *context) { 586 for (Function *function : functions_) { 587 if (!function->Init(context)) return false; 588 } 589 if (!this->InitializeFeatureTypes()) return false; 590 return true; 591 } 592 593 // Requests workspaces from the registry. Must be called after Init(), and 594 // before Preprocess(). Does not take ownership over registry. This should be 595 // the same registry used to initialize the WorkspaceSet used in Preprocess() 596 // and ExtractFeatures(). NB: This is a different ordering from that used in 597 // SentenceFeatureRepresentation style feature computation. RequestWorkspaces(WorkspaceRegistry * registry)598 void RequestWorkspaces(WorkspaceRegistry *registry) { 599 for (auto *function : functions_) function->RequestWorkspaces(registry); 600 } 601 602 // Preprocesses the object using feature functions for the phase. Must be 603 // called before any calls to ExtractFeatures() on that object and phase. Preprocess(WorkspaceSet * workspaces,const OBJ * object)604 void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const { 605 for (Function *function : functions_) { 606 function->Preprocess(workspaces, object); 607 } 608 } 609 610 // Extracts features from an object with a focus. This invokes all the 611 // top-level feature functions in the feature extractor. Only feature 612 // functions belonging to the specified phase are invoked. ExtractFeatures(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args,FeatureVector * result)613 void ExtractFeatures(const WorkspaceSet &workspaces, const OBJ &object, 614 ARGS... args, FeatureVector *result) const { 615 result->reserve(this->feature_types()); 616 617 // Extract features. 618 for (size_t i = 0; i < functions_.size(); ++i) { 619 functions_[i]->Evaluate(workspaces, object, args..., result); 620 } 621 } 622 623 private: 624 // Creates and initializes all feature functions in the feature extractor. 625 // 626 // Returns true on success, false otherwise. InitializeFeatureFunctions()627 SAFTM_MUST_USE_RESULT bool InitializeFeatureFunctions() override { 628 // Create all top-level feature functions. 629 for (int i = 0; i < descriptor().feature_size(); ++i) { 630 const FeatureFunctionDescriptor &fd = descriptor().feature(i); 631 Function *function = Function::Instantiate(this, &fd, ""); 632 if (function == nullptr) return false; 633 functions_.push_back(function); 634 } 635 return true; 636 } 637 638 // Collect all feature types used in the feature extractor. GetFeatureTypes(std::vector<FeatureType * > * types)639 void GetFeatureTypes(std::vector<FeatureType *> *types) const override { 640 for (size_t i = 0; i < functions_.size(); ++i) { 641 functions_[i]->GetFeatureTypes(types); 642 } 643 } 644 645 // Top-level feature functions (and variables) in the feature extractor. 646 // Owned. 647 std::vector<Function *> functions_; 648 }; 649 650 } // namespace mobile 651 } // namespace nlp_saft 652 653 #endif // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_ 654