xref: /aosp_15_r20/external/icing/icing/query/advanced_query_parser/parser.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
16 #define ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
17 
18 #include <memory>
19 #include <vector>
20 
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
23 #include "icing/query/advanced_query_parser/lexer.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 class Parser {
29  public:
Create(std::vector<Lexer::LexerToken> && lexer_tokens)30   static Parser Create(std::vector<Lexer::LexerToken>&& lexer_tokens) {
31     return Parser(std::move(lexer_tokens));
32   }
33 
34   // Returns:
35   //   On success, pointer to the root node of the AST
36   //   INVALID_ARGUMENT for input that does not conform to the grammar
37   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeQuery();
38 
39   // Returns:
40   //   On success, pointer to the root node of the AST
41   //   INVALID_ARGUMENT for input that does not conform to the grammar
42   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeScoring();
43 
44  private:
Parser(std::vector<Lexer::LexerToken> && lexer_tokens)45   explicit Parser(std::vector<Lexer::LexerToken>&& lexer_tokens)
46       : lexer_tokens_(std::move(lexer_tokens)),
47         current_token_(lexer_tokens_.begin()) {}
48 
49   // Match Functions
50   // These functions are used to test whether the current_token matches a member
51   // of the FIRST set of a particular symbol in our grammar.
Match(Lexer::TokenType token_type)52   bool Match(Lexer::TokenType token_type) const {
53     return current_token_ != lexer_tokens_.end() &&
54            current_token_->type == token_type;
55   }
56 
MatchMember()57   bool MatchMember() const { return Match(Lexer::TokenType::TEXT); }
58 
MatchFunction()59   bool MatchFunction() const { return Match(Lexer::TokenType::FUNCTION_NAME); }
60 
MatchComparable()61   bool MatchComparable() const {
62     return Match(Lexer::TokenType::STRING) || MatchMember() || MatchFunction();
63   }
64 
MatchComposite()65   bool MatchComposite() const { return Match(Lexer::TokenType::LPAREN); }
66 
MatchRestriction()67   bool MatchRestriction() const { return MatchComparable(); }
68 
MatchSimple()69   bool MatchSimple() const { return MatchRestriction() || MatchComposite(); }
70 
MatchTerm()71   bool MatchTerm() const {
72     return MatchSimple() || Match(Lexer::TokenType::NOT) ||
73            Match(Lexer::TokenType::MINUS);
74   }
75 
MatchFactor()76   bool MatchFactor() const { return MatchTerm(); }
77 
78   // Consume Functions
79   // These functions attempt to parse the token sequence starting at
80   // current_token_.
81   // Returns INVALID_ARGUMENT if unable to parse the token sequence starting at
82   // current_token_ as that particular grammar symbol. There are no guarantees
83   // about what state current_token and lexer_tokens_ are in when returning an
84   // error.
85   //
86   // Consume functions for terminal symbols. These are the only Consume
87   // functions that will directly modify current_token_.
88   // The Consume functions for terminals will guarantee not to modify
89   // current_token_ and lexer_tokens_ when returning an error.
90   libtextclassifier3::Status Consume(Lexer::TokenType token_type);
91 
92   libtextclassifier3::StatusOr<std::unique_ptr<TextNode>> ConsumeText();
93 
94   libtextclassifier3::StatusOr<std::string> ConsumeFunctionName();
95 
96   libtextclassifier3::StatusOr<std::unique_ptr<StringNode>>
97   ConsumeStringElement();
98 
99   libtextclassifier3::StatusOr<std::string> ConsumeComparator();
100 
101   // Consume functions for non-terminal symbols.
102   libtextclassifier3::StatusOr<std::unique_ptr<MemberNode>> ConsumeMember();
103 
104   libtextclassifier3::StatusOr<std::unique_ptr<FunctionNode>> ConsumeFunction();
105 
106   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeComparable();
107 
108   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeComposite();
109 
110   libtextclassifier3::StatusOr<std::vector<std::unique_ptr<Node>>>
111   ConsumeArgs();
112 
113   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeRestriction();
114 
115   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeSimple();
116 
117   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeTerm();
118 
119   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeFactor();
120 
121   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeSequence();
122 
123   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeQueryExpression();
124 
125   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeMultExpr();
126 
127   libtextclassifier3::StatusOr<std::unique_ptr<Node>>
128   ConsumeScoringExpression();
129 
130   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeExpression();
131 
132   std::vector<Lexer::LexerToken> lexer_tokens_;
133   std::vector<Lexer::LexerToken>::const_iterator current_token_;
134   Lexer::Language language_ = Lexer::Language::QUERY;
135 };
136 
137 }  // namespace lib
138 }  // namespace icing
139 
140 #endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
141