1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_ 16 #define ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_ 17 18 #include <memory> 19 #include <vector> 20 21 #include "icing/text_classifier/lib3/utils/base/statusor.h" 22 #include "icing/query/advanced_query_parser/abstract-syntax-tree.h" 23 #include "icing/query/advanced_query_parser/lexer.h" 24 25 namespace icing { 26 namespace lib { 27 28 class Parser { 29 public: Create(std::vector<Lexer::LexerToken> && lexer_tokens)30 static Parser Create(std::vector<Lexer::LexerToken>&& lexer_tokens) { 31 return Parser(std::move(lexer_tokens)); 32 } 33 34 // Returns: 35 // On success, pointer to the root node of the AST 36 // INVALID_ARGUMENT for input that does not conform to the grammar 37 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeQuery(); 38 39 // Returns: 40 // On success, pointer to the root node of the AST 41 // INVALID_ARGUMENT for input that does not conform to the grammar 42 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeScoring(); 43 44 private: Parser(std::vector<Lexer::LexerToken> && lexer_tokens)45 explicit Parser(std::vector<Lexer::LexerToken>&& lexer_tokens) 46 : lexer_tokens_(std::move(lexer_tokens)), 47 current_token_(lexer_tokens_.begin()) {} 48 49 // Match Functions 50 // These functions are used to test whether the current_token matches a member 51 // of the FIRST set of a particular symbol in our grammar. Match(Lexer::TokenType token_type)52 bool Match(Lexer::TokenType token_type) const { 53 return current_token_ != lexer_tokens_.end() && 54 current_token_->type == token_type; 55 } 56 MatchMember()57 bool MatchMember() const { return Match(Lexer::TokenType::TEXT); } 58 MatchFunction()59 bool MatchFunction() const { return Match(Lexer::TokenType::FUNCTION_NAME); } 60 MatchComparable()61 bool MatchComparable() const { 62 return Match(Lexer::TokenType::STRING) || MatchMember() || MatchFunction(); 63 } 64 MatchComposite()65 bool MatchComposite() const { return Match(Lexer::TokenType::LPAREN); } 66 MatchRestriction()67 bool MatchRestriction() const { return MatchComparable(); } 68 MatchSimple()69 bool MatchSimple() const { return MatchRestriction() || MatchComposite(); } 70 MatchTerm()71 bool MatchTerm() const { 72 return MatchSimple() || Match(Lexer::TokenType::NOT) || 73 Match(Lexer::TokenType::MINUS); 74 } 75 MatchFactor()76 bool MatchFactor() const { return MatchTerm(); } 77 78 // Consume Functions 79 // These functions attempt to parse the token sequence starting at 80 // current_token_. 81 // Returns INVALID_ARGUMENT if unable to parse the token sequence starting at 82 // current_token_ as that particular grammar symbol. There are no guarantees 83 // about what state current_token and lexer_tokens_ are in when returning an 84 // error. 85 // 86 // Consume functions for terminal symbols. These are the only Consume 87 // functions that will directly modify current_token_. 88 // The Consume functions for terminals will guarantee not to modify 89 // current_token_ and lexer_tokens_ when returning an error. 90 libtextclassifier3::Status Consume(Lexer::TokenType token_type); 91 92 libtextclassifier3::StatusOr<std::unique_ptr<TextNode>> ConsumeText(); 93 94 libtextclassifier3::StatusOr<std::string> ConsumeFunctionName(); 95 96 libtextclassifier3::StatusOr<std::unique_ptr<StringNode>> 97 ConsumeStringElement(); 98 99 libtextclassifier3::StatusOr<std::string> ConsumeComparator(); 100 101 // Consume functions for non-terminal symbols. 102 libtextclassifier3::StatusOr<std::unique_ptr<MemberNode>> ConsumeMember(); 103 104 libtextclassifier3::StatusOr<std::unique_ptr<FunctionNode>> ConsumeFunction(); 105 106 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeComparable(); 107 108 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeComposite(); 109 110 libtextclassifier3::StatusOr<std::vector<std::unique_ptr<Node>>> 111 ConsumeArgs(); 112 113 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeRestriction(); 114 115 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeSimple(); 116 117 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeTerm(); 118 119 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeFactor(); 120 121 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeSequence(); 122 123 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeQueryExpression(); 124 125 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeMultExpr(); 126 127 libtextclassifier3::StatusOr<std::unique_ptr<Node>> 128 ConsumeScoringExpression(); 129 130 libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeExpression(); 131 132 std::vector<Lexer::LexerToken> lexer_tokens_; 133 std::vector<Lexer::LexerToken>::const_iterator current_token_; 134 Lexer::Language language_ = Lexer::Language::QUERY; 135 }; 136 137 } // namespace lib 138 } // namespace icing 139 140 #endif // ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_ 141