xref: /aosp_15_r20/external/libtextclassifier/native/lang_id/common/fel/feature-extractor.h (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Generic feature extractor for extracting features from objects. The feature
18 // extractor can be used for extracting features from any object. The feature
19 // extractor and feature function classes are template classes that have to
20 // be instantiated for extracting feature from a specific object type.
21 //
22 // A feature extractor consists of a hierarchy of feature functions. Each
23 // feature function extracts one or more feature type and value pairs from the
24 // object.
25 //
26 // The feature extractor has a modular design where new feature functions can be
27 // registered as components. The feature extractor is initialized from a
28 // descriptor represented by a protocol buffer. The feature extractor can also
29 // be initialized from a text-based source specification of the feature
30 // extractor. Feature specification parsers can be added as components. By
31 // default the feature extractor can be read from an ASCII protocol buffer or in
32 // a simple feature modeling language (fml).
33 
34 // A feature function is invoked with a focus. Nested feature function can be
35 // invoked with another focus determined by the parent feature function.
36 
37 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_
38 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_
39 
40 #include <stddef.h>
41 
42 #include <string>
43 #include <vector>
44 
45 #include "lang_id/common/fel/feature-descriptors.h"
46 #include "lang_id/common/fel/feature-types.h"
47 #include "lang_id/common/fel/task-context.h"
48 #include "lang_id/common/fel/workspace.h"
49 #include "lang_id/common/lite_base/attributes.h"
50 #include "lang_id/common/lite_base/integral-types.h"
51 #include "lang_id/common/lite_base/logging.h"
52 #include "lang_id/common/lite_base/macros.h"
53 #include "lang_id/common/registry.h"
54 #include "lang_id/common/stl-util.h"
55 #include "absl/strings/string_view.h"
56 
57 namespace libtextclassifier3 {
58 namespace mobile {
59 
60 // TODO(djweiss) Clean this up as well.
61 // Use the same type for feature values as is used for predicated.
62 typedef int64 Predicate;
63 typedef Predicate FeatureValue;
64 
65 // A union used to represent discrete and continuous feature values.
66 union FloatFeatureValue {
67  public:
FloatFeatureValue(FeatureValue v)68   explicit FloatFeatureValue(FeatureValue v) : discrete_value(v) {}
FloatFeatureValue(uint32 i,float w)69   FloatFeatureValue(uint32 i, float w) : id(i), weight(w) {}
70   FeatureValue discrete_value;
71   struct {
72     uint32 id;
73     float weight;
74   };
75 };
76 
77 // A feature vector contains feature type and value pairs.
78 class FeatureVector {
79  public:
FeatureVector()80   FeatureVector() {}
81 
82   // Adds feature type and value pair to feature vector.
add(FeatureType * type,FeatureValue value)83   void add(FeatureType *type, FeatureValue value) {
84     features_.emplace_back(type, value);
85   }
86 
87   // Removes all elements from the feature vector.
clear()88   void clear() { features_.clear(); }
89 
90   // Returns the number of elements in the feature vector.
size()91   int size() const { return features_.size(); }
92 
93   // Reserves space in the underlying feature vector.
reserve(int n)94   void reserve(int n) { features_.reserve(n); }
95 
96   // Returns feature type for an element in the feature vector.
type(int index)97   FeatureType *type(int index) const { return features_[index].type; }
98 
99   // Returns feature value for an element in the feature vector.
value(int index)100   FeatureValue value(int index) const { return features_[index].value; }
101 
102  private:
103   // Structure for holding feature type and value pairs.
104   struct Element {
ElementElement105     Element() : type(nullptr), value(-1) {}
ElementElement106     Element(FeatureType *t, FeatureValue v) : type(t), value(v) {}
107 
108     FeatureType *type;
109     FeatureValue value;
110   };
111 
112   // Array for storing feature vector elements.
113   std::vector<Element> features_;
114 
115   SAFTM_DISALLOW_COPY_AND_ASSIGN(FeatureVector);
116 };
117 
118 // The generic feature extractor is the type-independent part of a feature
119 // extractor. This holds the descriptor for the feature extractor and the
120 // collection of feature types used in the feature extractor.  The feature
121 // types are not available until FeatureExtractor<>::Init() has been called.
122 class GenericFeatureExtractor {
123  public:
124   GenericFeatureExtractor();
125   virtual ~GenericFeatureExtractor();
126 
127   // Initializes the feature extractor from the FEL specification |source|.
128   //
129   // Returns true on success, false otherwise (e.g., FEL syntax error).
130   SAFTM_MUST_USE_RESULT bool Parse(const std::string &source);
131 
132   // Returns the feature extractor descriptor.
descriptor()133   const FeatureExtractorDescriptor &descriptor() const { return descriptor_; }
mutable_descriptor()134   FeatureExtractorDescriptor *mutable_descriptor() { return &descriptor_; }
135 
136   // Returns the number of feature types in the feature extractor.  Invalid
137   // before Init() has been called.
feature_types()138   int feature_types() const { return feature_types_.size(); }
139 
140  protected:
141   // Initializes the feature types used by the extractor.  Called from
142   // FeatureExtractor<>::Init().
143   //
144   // Returns true on success, false otherwise.
145   SAFTM_MUST_USE_RESULT bool InitializeFeatureTypes();
146 
147  private:
148   // Initializes the top-level feature functions.
149   //
150   // Returns true on success, false otherwise.
151   SAFTM_MUST_USE_RESULT virtual bool InitializeFeatureFunctions() = 0;
152 
153   // Returns all feature types used by the extractor. The feature types are
154   // added to the result array.
155   virtual void GetFeatureTypes(std::vector<FeatureType *> *types) const = 0;
156 
157   // Descriptor for the feature extractor. This is a protocol buffer that
158   // contains all the information about the feature extractor. The feature
159   // functions are initialized from the information in the descriptor.
160   FeatureExtractorDescriptor descriptor_;
161 
162   // All feature types used by the feature extractor. The collection of all the
163   // feature types describes the feature space of the feature set produced by
164   // the feature extractor.  Not owned.
165   std::vector<FeatureType *> feature_types_;
166 };
167 
168 // The generic feature function is the type-independent part of a feature
169 // function. Each feature function is associated with the descriptor that it is
170 // instantiated from.  The feature types associated with this feature function
171 // will be established by the time FeatureExtractor<>::Init() completes.
172 class GenericFeatureFunction {
173  public:
174   // A feature value that represents the absence of a value.
175   static constexpr FeatureValue kNone = -1;
176 
177   GenericFeatureFunction();
178   virtual ~GenericFeatureFunction();
179 
180   // Sets up the feature function. NB: FeatureTypes of nested functions are not
181   // guaranteed to be available until Init().
182   //
183   // Returns true on success, false otherwise.
Setup(TaskContext * context)184   SAFTM_MUST_USE_RESULT virtual bool Setup(TaskContext *context) {
185     return true;
186   }
187 
188   // Initializes the feature function. NB: The FeatureType of this function must
189   // be established when this method completes.
190   //
191   // Returns true on success, false otherwise.
Init(TaskContext * context)192   SAFTM_MUST_USE_RESULT virtual bool Init(TaskContext *context) { return true; }
193 
194   // Requests workspaces from a registry to obtain indices into a WorkspaceSet
195   // for any Workspace objects used by this feature function. NB: This will be
196   // called after Init(), so it can depend on resources and arguments.
RequestWorkspaces(WorkspaceRegistry * registry)197   virtual void RequestWorkspaces(WorkspaceRegistry *registry) {}
198 
199   // Appends the feature types produced by the feature function to types.  The
200   // default implementation appends feature_type(), if non-null.  Invalid
201   // before Init() has been called.
202   virtual void GetFeatureTypes(std::vector<FeatureType *> *types) const;
203 
204   // Returns the feature type for feature produced by this feature function. If
205   // the feature function produces features of different types this returns
206   // null.  Invalid before Init() has been called.
207   virtual FeatureType *GetFeatureType() const;
208 
209   // Returns value of parameter |name| from the feature function descriptor.
210   // If the parameter is not present, returns the indicated |default_value|.
211   std::string GetParameter(const std::string &name,
212                            const std::string &default_value) const;
213 
214   // Returns value of int parameter |name| from feature function descriptor.
215   // If the parameter is not present, or its value can't be parsed as an int,
216   // returns |default_value|.
217   int GetIntParameter(const std::string &name, int default_value) const;
218 
219   // Returns value of bool parameter |name| from feature function descriptor.
220   // If the parameter is not present, or its value is not "true" or "false",
221   // returns |default_value|.  NOTE: this method is case sensitive, it doesn't
222   // do any lower-casing.
223   bool GetBoolParameter(const std::string &name, bool default_value) const;
224 
225   // Returns the FEL function description for the feature function, i.e. the
226   // name and parameters without the nested features.
FunctionName()227   std::string FunctionName() const {
228     std::string output;
229     ToFELFunction(*descriptor_, &output);
230     return output;
231   }
232 
233   // Returns the prefix for nested feature functions. This is the prefix of this
234   // feature function concatenated with the feature function name.
SubPrefix()235   std::string SubPrefix() const {
236     return prefix_.empty() ? FunctionName() : prefix_ + "." + FunctionName();
237   }
238 
239   // Returns/sets the feature extractor this function belongs to.
extractor()240   const GenericFeatureExtractor *extractor() const { return extractor_; }
set_extractor(const GenericFeatureExtractor * extractor)241   void set_extractor(const GenericFeatureExtractor *extractor) {
242     extractor_ = extractor;
243   }
244 
245   // Returns/sets the feature function descriptor.
descriptor()246   const FeatureFunctionDescriptor *descriptor() const { return descriptor_; }
set_descriptor(const FeatureFunctionDescriptor * descriptor)247   void set_descriptor(const FeatureFunctionDescriptor *descriptor) {
248     descriptor_ = descriptor;
249   }
250 
251   // Returns a descriptive name for the feature function. The name is taken from
252   // the descriptor for the feature function. If the name is empty or the
253   // feature function is a variable the name is the FEL representation of the
254   // feature, including the prefix.
255   std::string name() const;
256 
257   // Returns the argument from the feature function descriptor. It defaults to
258   // 0 if the argument has not been specified.
argument()259   int argument() const {
260     return descriptor_->has_argument() ? descriptor_->argument() : 0;
261   }
262 
263   // Returns/sets/clears function name prefix.
prefix()264   const std::string &prefix() const { return prefix_; }
set_prefix(absl::string_view prefix)265   void set_prefix(absl::string_view prefix) { prefix_ = std::string(prefix); }
266 
267  protected:
268   // Returns the feature type for single-type feature functions.
feature_type()269   FeatureType *feature_type() const { return feature_type_; }
270 
271   // Sets the feature type for single-type feature functions.  This takes
272   // ownership of feature_type.  Can only be called once.
set_feature_type(FeatureType * feature_type)273   void set_feature_type(FeatureType *feature_type) {
274     SAFTM_CHECK_EQ(feature_type_, nullptr);
275     feature_type_ = feature_type;
276   }
277 
278  private:
279   // Feature extractor this feature function belongs to.  Not owned.  Set to a
280   // pointer != nullptr as soon as this object is created by Instantiate().
281   // Normal methods can safely assume this is != nullptr.
282   const GenericFeatureExtractor *extractor_ = nullptr;
283 
284   // Descriptor for feature function.  Not owned.  Set to a pointer != nullptr
285   // as soon as this object is created by Instantiate().  Normal methods can
286   // safely assume this is != nullptr.
287   const FeatureFunctionDescriptor *descriptor_ = nullptr;
288 
289   // Feature type for features produced by this feature function. If the
290   // feature function produces features of multiple feature types this is null
291   // and the feature function must return it's feature types in
292   // GetFeatureTypes().  Owned.
293   FeatureType *feature_type_ = nullptr;
294 
295   // Prefix used for sub-feature types of this function.
296   std::string prefix_;
297 };
298 
299 // Feature function that can extract features from an object.  Templated on
300 // two type arguments:
301 //
302 // OBJ:  The "object" from which features are extracted; e.g., a sentence.  This
303 //       should be a plain type, rather than a reference or pointer.
304 //
305 // ARGS: A set of 0 or more types that are used to "index" into some part of the
306 //       object that should be extracted, e.g. an int token index for a sentence
307 //       object.  This should not be a reference type.
308 template <class OBJ, class... ARGS>
309 class FeatureFunction
310     : public GenericFeatureFunction,
311       public RegisterableClass<FeatureFunction<OBJ, ARGS...> > {
312  public:
313   using Self = FeatureFunction<OBJ, ARGS...>;
314 
315   // Preprocesses the object.  This will be called prior to calling Evaluate()
316   // or Compute() on that object.
Preprocess(WorkspaceSet * workspaces,const OBJ * object)317   virtual void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const {}
318 
319   // Appends features computed from the object and focus to the result.  The
320   // default implementation delegates to Compute(), adding a single value if
321   // available.  Multi-valued feature functions must override this method.
Evaluate(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args,FeatureVector * result)322   virtual void Evaluate(const WorkspaceSet &workspaces, const OBJ &object,
323                         ARGS... args, FeatureVector *result) const {
324     FeatureValue value = Compute(workspaces, object, args...);
325     if (value != kNone) result->add(feature_type(), value);
326   }
327 
328   // Returns a feature value computed from the object and focus, or kNone if no
329   // value is computed.  Single-valued feature functions only need to override
330   // this method.
Compute(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args)331   virtual FeatureValue Compute(const WorkspaceSet &workspaces,
332                                const OBJ &object, ARGS... args) const {
333     return kNone;
334   }
335 
336   // Instantiates a new feature function in a feature extractor from a feature
337   // descriptor.
338   //
339   // Returns a pointer to the newly-created object if everything goes well.
340   // Returns nullptr if the feature function could not be instantiated (e.g., if
341   // the function with that name is not registered; this usually happens because
342   // the relevant cc_library was not linked-in).
Instantiate(const GenericFeatureExtractor * extractor,const FeatureFunctionDescriptor * fd,absl::string_view prefix)343   static Self *Instantiate(const GenericFeatureExtractor *extractor,
344                            const FeatureFunctionDescriptor *fd,
345                            absl::string_view prefix) {
346     Self *f = Self::Create(fd->type());
347     if (f != nullptr) {
348       f->set_extractor(extractor);
349       f->set_descriptor(fd);
350       f->set_prefix(prefix);
351     }
352     return f;
353   }
354 
355  private:
356   // Special feature function class for resolving variable references. The type
357   // of the feature function is used for resolving the variable reference. When
358   // evaluated it will either get the feature value(s) from the variable portion
359   // of the feature vector, if present, or otherwise it will call the referenced
360   // feature extractor function directly to extract the feature(s).
361   class Reference;
362 };
363 
364 // Base class for features with nested feature functions. The nested functions
365 // are of type NES, which may be different from the type of the parent function.
366 // NB: NestedFeatureFunction will ensure that all initialization of nested
367 // functions takes place during Setup() and Init() -- after the nested features
368 // are initialized, the parent feature is initialized via SetupNested() and
369 // InitNested(). Alternatively, a derived classes that overrides Setup() and
370 // Init() directly should call Parent::Setup(), Parent::Init(), etc. first.
371 //
372 // Note: NestedFeatureFunction cannot know how to call Preprocess, Evaluate, or
373 // Compute, since the nested functions may be of a different type.
374 template <class NES, class OBJ, class... ARGS>
375 class NestedFeatureFunction : public FeatureFunction<OBJ, ARGS...> {
376  public:
377   using Parent = NestedFeatureFunction<NES, OBJ, ARGS...>;
378 
379   // Clean up nested functions.
~NestedFeatureFunction()380   ~NestedFeatureFunction() override { utils::STLDeleteElements(&nested_); }
381 
382   // By default, just appends the nested feature types.
GetFeatureTypes(std::vector<FeatureType * > * types)383   void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
384     SAFTM_CHECK(!this->nested().empty())
385         << "Nested features require nested features to be defined.";
386     for (auto *function : nested_) function->GetFeatureTypes(types);
387   }
388 
389   // Sets up the nested features.
390   //
391   // Returns true on success, false otherwise.
Setup(TaskContext * context)392   SAFTM_MUST_USE_RESULT bool Setup(TaskContext *context) override {
393     bool success = CreateNested(this->extractor(), this->descriptor(), &nested_,
394                                 this->SubPrefix());
395     if (!success) return false;
396     for (auto *function : nested_) {
397       if (!function->Setup(context)) return false;
398     }
399     if (!SetupNested(context)) return false;
400     return true;
401   }
402 
403   // Sets up this NestedFeatureFunction specifically.
404   //
405   // Returns true on success, false otherwise.
SetupNested(TaskContext * context)406   SAFTM_MUST_USE_RESULT virtual bool SetupNested(TaskContext *context) {
407     return true;
408   }
409 
410   // Initializes the nested features.
411   //
412   // Returns true on success, false otherwise.
Init(TaskContext * context)413   SAFTM_MUST_USE_RESULT bool Init(TaskContext *context) override {
414     for (auto *function : nested_) {
415       if (!function->Init(context)) return false;
416     }
417     if (!InitNested(context)) return false;
418     return true;
419   }
420 
421   // Initializes this NestedFeatureFunction specifically.
422   //
423   // Returns true on success, false otherwise.
InitNested(TaskContext * context)424   SAFTM_MUST_USE_RESULT virtual bool InitNested(TaskContext *context) {
425     return true;
426   }
427 
428   // Gets all the workspaces needed for the nested functions.
RequestWorkspaces(WorkspaceRegistry * registry)429   void RequestWorkspaces(WorkspaceRegistry *registry) override {
430     for (auto *function : nested_) function->RequestWorkspaces(registry);
431   }
432 
433   // Returns the list of nested feature functions.
nested()434   const std::vector<NES *> &nested() const { return nested_; }
435 
436   // Instantiates nested feature functions for a feature function. Creates and
437   // initializes one feature function for each sub-descriptor in the feature
438   // descriptor.
439   //
440   // Returns true on success, false otherwise.
CreateNested(const GenericFeatureExtractor * extractor,const FeatureFunctionDescriptor * fd,std::vector<NES * > * functions,absl::string_view prefix)441   SAFTM_MUST_USE_RESULT static bool CreateNested(
442       const GenericFeatureExtractor *extractor,
443       const FeatureFunctionDescriptor *fd, std::vector<NES *> *functions,
444       absl::string_view prefix) {
445     for (int i = 0; i < fd->feature_size(); ++i) {
446       const FeatureFunctionDescriptor &sub = fd->feature(i);
447       NES *f = NES::Instantiate(extractor, &sub, prefix);
448       if (f == nullptr) return false;
449       functions->push_back(f);
450     }
451     return true;
452   }
453 
454  protected:
455   // The nested feature functions, if any, in order of declaration in the
456   // feature descriptor.  Owned.
457   std::vector<NES *> nested_;
458 };
459 
460 // Base class for a nested feature function that takes nested features with the
461 // same signature as these features, i.e. a meta feature. For this class, we can
462 // provide preprocessing of the nested features.
463 template <class OBJ, class... ARGS>
464 class MetaFeatureFunction
465     : public NestedFeatureFunction<FeatureFunction<OBJ, ARGS...>, OBJ,
466                                    ARGS...> {
467  public:
468   // Preprocesses using the nested features.
Preprocess(WorkspaceSet * workspaces,const OBJ * object)469   void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const override {
470     for (auto *function : this->nested_) {
471       function->Preprocess(workspaces, object);
472     }
473   }
474 };
475 
476 // Template for a special type of locator: The locator of type
477 // FeatureFunction<OBJ, ARGS...> calls nested functions of type
478 // FeatureFunction<OBJ, IDX, ARGS...>, where the derived class DER is
479 // responsible for translating by providing the following:
480 //
481 // // Gets the new additional focus.
482 // IDX GetFocus(const WorkspaceSet &workspaces, const OBJ &object);
483 //
484 // This is useful to e.g. add a token focus to a parser state based on some
485 // desired property of that state.
486 template <class DER, class OBJ, class IDX, class... ARGS>
487 class FeatureAddFocusLocator
488     : public NestedFeatureFunction<FeatureFunction<OBJ, IDX, ARGS...>, OBJ,
489                                    ARGS...> {
490  public:
Preprocess(WorkspaceSet * workspaces,const OBJ * object)491   void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const override {
492     for (auto *function : this->nested_) {
493       function->Preprocess(workspaces, object);
494     }
495   }
496 
Evaluate(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args,FeatureVector * result)497   void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, ARGS... args,
498                 FeatureVector *result) const override {
499     IDX focus =
500         static_cast<const DER *>(this)->GetFocus(workspaces, object, args...);
501     for (auto *function : this->nested()) {
502       function->Evaluate(workspaces, object, focus, args..., result);
503     }
504   }
505 
506   // Returns the first nested feature's computed value.
Compute(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args)507   FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object,
508                        ARGS... args) const override {
509     IDX focus =
510         static_cast<const DER *>(this)->GetFocus(workspaces, object, args...);
511     return this->nested()[0]->Compute(workspaces, object, focus, args...);
512   }
513 };
514 
515 // CRTP feature locator class. This is a meta feature that modifies ARGS and
516 // then calls the nested feature functions with the modified ARGS. Note that in
517 // order for this template to work correctly, all of ARGS must be types for
518 // which the reference operator & can be interpreted as a pointer to the
519 // argument. The derived class DER must implement the UpdateFocus method which
520 // takes pointers to the ARGS arguments:
521 //
522 // // Updates the current arguments.
523 // void UpdateArgs(const OBJ &object, ARGS *...args) const;
524 template <class DER, class OBJ, class... ARGS>
525 class FeatureLocator : public MetaFeatureFunction<OBJ, ARGS...> {
526  public:
527   // Feature locators have an additional check that there is no intrinsic type.
GetFeatureTypes(std::vector<FeatureType * > * types)528   void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
529     SAFTM_CHECK_EQ(this->feature_type(), nullptr)
530         << "FeatureLocators should not have an intrinsic type.";
531     MetaFeatureFunction<OBJ, ARGS...>::GetFeatureTypes(types);
532   }
533 
534   // Evaluates the locator.
Evaluate(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args,FeatureVector * result)535   void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, ARGS... args,
536                 FeatureVector *result) const override {
537     static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...);
538     for (auto *function : this->nested()) {
539       function->Evaluate(workspaces, object, args..., result);
540     }
541   }
542 
543   // Returns the first nested feature's computed value.
Compute(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args)544   FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object,
545                        ARGS... args) const override {
546     static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...);
547     return this->nested()[0]->Compute(workspaces, object, args...);
548   }
549 };
550 
551 // Feature extractor for extracting features from objects of a certain class.
552 // Template type parameters are as defined for FeatureFunction.
553 template <class OBJ, class... ARGS>
554 class FeatureExtractor : public GenericFeatureExtractor {
555  public:
556   // Feature function type for top-level functions in the feature extractor.
557   typedef FeatureFunction<OBJ, ARGS...> Function;
558   typedef FeatureExtractor<OBJ, ARGS...> Self;
559 
560   // Feature locator type for the feature extractor.
561   template <class DER>
562   using Locator = FeatureLocator<DER, OBJ, ARGS...>;
563 
564   // Initializes feature extractor.
FeatureExtractor()565   FeatureExtractor() {}
566 
~FeatureExtractor()567   ~FeatureExtractor() override { utils::STLDeleteElements(&functions_); }
568 
569   // Sets up the feature extractor. Note that only top-level functions exist
570   // until Setup() is called. This does not take ownership over the context,
571   // which must outlive this.
572   //
573   // Returns true on success, false otherwise.
Setup(TaskContext * context)574   SAFTM_MUST_USE_RESULT bool Setup(TaskContext *context) {
575     for (Function *function : functions_) {
576       if (!function->Setup(context)) return false;
577     }
578     return true;
579   }
580 
581   // Initializes the feature extractor.  Must be called after Setup().  This
582   // does not take ownership over the context, which must outlive this.
583   //
584   // Returns true on success, false otherwise.
Init(TaskContext * context)585   SAFTM_MUST_USE_RESULT bool Init(TaskContext *context) {
586     for (Function *function : functions_) {
587       if (!function->Init(context)) return false;
588     }
589     if (!this->InitializeFeatureTypes()) return false;
590     return true;
591   }
592 
593   // Requests workspaces from the registry. Must be called after Init(), and
594   // before Preprocess(). Does not take ownership over registry. This should be
595   // the same registry used to initialize the WorkspaceSet used in Preprocess()
596   // and ExtractFeatures(). NB: This is a different ordering from that used in
597   // SentenceFeatureRepresentation style feature computation.
RequestWorkspaces(WorkspaceRegistry * registry)598   void RequestWorkspaces(WorkspaceRegistry *registry) {
599     for (auto *function : functions_) function->RequestWorkspaces(registry);
600   }
601 
602   // Preprocesses the object using feature functions for the phase.  Must be
603   // called before any calls to ExtractFeatures() on that object and phase.
Preprocess(WorkspaceSet * workspaces,const OBJ * object)604   void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const {
605     for (Function *function : functions_) {
606       function->Preprocess(workspaces, object);
607     }
608   }
609 
610   // Extracts features from an object with a focus. This invokes all the
611   // top-level feature functions in the feature extractor. Only feature
612   // functions belonging to the specified phase are invoked.
ExtractFeatures(const WorkspaceSet & workspaces,const OBJ & object,ARGS...args,FeatureVector * result)613   void ExtractFeatures(const WorkspaceSet &workspaces, const OBJ &object,
614                        ARGS... args, FeatureVector *result) const {
615     result->reserve(this->feature_types());
616 
617     // Extract features.
618     for (size_t i = 0; i < functions_.size(); ++i) {
619       functions_[i]->Evaluate(workspaces, object, args..., result);
620     }
621   }
622 
623  private:
624   // Creates and initializes all feature functions in the feature extractor.
625   //
626   // Returns true on success, false otherwise.
InitializeFeatureFunctions()627   SAFTM_MUST_USE_RESULT bool InitializeFeatureFunctions() override {
628     // Create all top-level feature functions.
629     for (int i = 0; i < descriptor().feature_size(); ++i) {
630       const FeatureFunctionDescriptor &fd = descriptor().feature(i);
631       Function *function = Function::Instantiate(this, &fd, "");
632       if (function == nullptr) return false;
633       functions_.push_back(function);
634     }
635     return true;
636   }
637 
638   // Collect all feature types used in the feature extractor.
GetFeatureTypes(std::vector<FeatureType * > * types)639   void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
640     for (size_t i = 0; i < functions_.size(); ++i) {
641       functions_[i]->GetFeatureTypes(types);
642     }
643   }
644 
645   // Top-level feature functions (and variables) in the feature extractor.
646   // Owned.
647   std::vector<Function *> functions_;
648 };
649 
650 }  // namespace mobile
651 }  // namespace nlp_saft
652 
653 #endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_
654