xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/hlo_lexer.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_
18 
19 #include <string>
20 
21 #include "absl/strings/string_view.h"
22 #include "tensorflow/compiler/xla/shape.h"
23 #include "tensorflow/compiler/xla/types.h"
24 #include "tensorflow/compiler/xla/xla_data.pb.h"
25 #include "tensorflow/core/platform/logging.h"
26 #include "tensorflow/core/platform/regexp.h"
27 
28 namespace xla {
29 
30 // Defines different kinds of tokens used by the HLO lexer.
31 //
32 // You shouldn't need to use this directly unless you're using HloLexer
33 // directly, and you probably don't need to do that.  Use hlo_parser instead.
34 enum class TokKind {
35   // Markers
36   kEof,
37   kError,
38 
39   // Tokens with no info.
40   kEqual,     // =
41   kComma,     // ,
42   kColon,     // :
43   kAsterisk,  // *
44   kLsquare,
45   kRsquare,  // [  ]
46   kLbrace,
47   kRbrace,  // {  }
48   kLparen,
49   kRparen,  // (  )
50   kDots,    // ...
51 
52   kArrow,  // ->
53   kLeq,    // <=
54 
55   // Keywords
56   kw_HloModule,
57   kw_ENTRY,
58   kw_ROOT,
59   kw_true,
60   kw_false,
61   kw_maximal,
62   kw_replicated,
63   kw_manual,
64   kw_last_tile_dim_replicate,
65   kw_inf,
66 
67   kNegInf,  // -inf
68 
69   // Typed tokens.
70   kPrimitiveType,  // F32, PRED, etc.
71   kName,           // %foo
72   kAttributeName,  // dimensions=
73   kDimLabels,      // [0-9bf?]{2,}_[0-9io?]{2,}->[0-9bf?]{2,}
74   kDxD,            // [0-9]+(x[0-9]+)+
75   kPad,            // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
76   kIdent,          // other identifiers
77   kString,         // "abcd\"\n"
78   kInt,            // 42
79   kDecimal,        // 4.2
80 };
81 
82 std::string TokKindToString(TokKind kind);
83 
84 // Lexer for the HloModule::ToString() format text.
85 //
86 // This class is meant to be used by hlo_parser.cc.  You shouldn't need to use
87 // it directly.
88 class HloLexer {
89  public:
HloLexer(absl::string_view buf)90   explicit HloLexer(absl::string_view buf) : buf_(buf) {
91     current_ptr_ = buf_.data();
92   }
93 
Lex()94   TokKind Lex() { return token_state_.current_kind = LexToken(); }
95 
GetKind()96   TokKind GetKind() const { return token_state_.current_kind; }
GetStrVal()97   std::string GetStrVal() const {
98     switch (GetKind()) {
99       case TokKind::kName:
100       case TokKind::kAttributeName:
101       case TokKind::kDimLabels:
102       case TokKind::kDxD:
103       case TokKind::kPad:
104       case TokKind::kString:
105       case TokKind::kIdent:
106         return token_state_.str_val;
107       default:
108         LOG(FATAL) << "This token does not have string value";
109     }
110   }
GetInt64Val()111   int64_t GetInt64Val() const {
112     CHECK(GetKind() == TokKind::kInt) << TokKindToString(GetKind());
113     return token_state_.int64_val;
114   }
GetDecimalVal()115   double GetDecimalVal() const {
116     CHECK(GetKind() == TokKind::kDecimal);
117     return token_state_.decimal_val;
118   }
GetPrimitiveTypeVal()119   PrimitiveType GetPrimitiveTypeVal() const {
120     CHECK(GetKind() == TokKind::kPrimitiveType);
121     return token_state_.primitive_type_val;
122   }
123 
124   typedef const char* LocTy;
125 
126   // Returns the location of the current token.
GetLoc()127   LocTy GetLoc() const { return token_state_.token_start; }
128 
129   // Returns the line and column of a location in the buffer.
130   std::pair<unsigned, unsigned> GetLineAndColumn(LocTy location) const;
131 
132   // Returns the whole line given the location.
133   absl::string_view GetLine(LocTy loc) const;
134 
135   // Looks ahead one token and returns it. Lexer state is unchanged.
136   TokKind LookAhead();
137 
138  private:
139   // Returns the current character. If it's neither the end of input buffer nor
140   // an invalid character, moves the pointer forward.
141   int GetNextChar();
142 
143   // Returns the current character.
144   int PeekCurrentChar() const;
145 
146   // Creates string_view with the given begin and end. Exits if the begin > end,
147   // or it's out of the range of the current buffer.
148   absl::string_view StringViewFromPointers(const char* begin,
149                                            const char* end) const;
150 
151   // Returns true if the given ptr is dereferenceable within the range of the
152   // current buffer.
153   bool CanDereference(const char* ptr) const;
154 
155   TokKind LexToken();
156 
157   TokKind LexIdentifier();
158   TokKind LexPercent();
159   TokKind LexShape();
160   TokKind LexConstant();
161   TokKind LexNumberOrPattern();
162   TokKind LexString();
163 
164   std::optional<int64_t> LexNanPayload(absl::string_view& consumable);
165 
166   absl::string_view buf_;
167   const char* current_ptr_;
168 
169   // Information about the current token.
170   struct TokenState {
171     const char* token_start = nullptr;
172     TokKind current_kind;
173     std::string str_val;
174     int64_t int64_val;
175     double decimal_val;
176     PrimitiveType primitive_type_val;
177   };
178   TokenState token_state_;
179 
180   struct LineNoCacheTy {
181     const char* last_query;
182     unsigned line_no_of_query;
183   };
184   // This caches the line number of the previous query.
185   mutable LineNoCacheTy line_no_cache_{nullptr, 0};
186 };
187 
188 }  // namespace xla
189 
190 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_
191