1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ 18 19 #include <string> 20 21 #include "absl/strings/string_view.h" 22 #include "tensorflow/compiler/xla/shape.h" 23 #include "tensorflow/compiler/xla/types.h" 24 #include "tensorflow/compiler/xla/xla_data.pb.h" 25 #include "tensorflow/core/platform/logging.h" 26 #include "tensorflow/core/platform/regexp.h" 27 28 namespace xla { 29 30 // Defines different kinds of tokens used by the HLO lexer. 31 // 32 // You shouldn't need to use this directly unless you're using HloLexer 33 // directly, and you probably don't need to do that. Use hlo_parser instead. 34 enum class TokKind { 35 // Markers 36 kEof, 37 kError, 38 39 // Tokens with no info. 40 kEqual, // = 41 kComma, // , 42 kColon, // : 43 kAsterisk, // * 44 kLsquare, 45 kRsquare, // [ ] 46 kLbrace, 47 kRbrace, // { } 48 kLparen, 49 kRparen, // ( ) 50 kDots, // ... 51 52 kArrow, // -> 53 kLeq, // <= 54 55 // Keywords 56 kw_HloModule, 57 kw_ENTRY, 58 kw_ROOT, 59 kw_true, 60 kw_false, 61 kw_maximal, 62 kw_replicated, 63 kw_manual, 64 kw_last_tile_dim_replicate, 65 kw_inf, 66 67 kNegInf, // -inf 68 69 // Typed tokens. 70 kPrimitiveType, // F32, PRED, etc. 71 kName, // %foo 72 kAttributeName, // dimensions= 73 kDimLabels, // [0-9bf?]{2,}_[0-9io?]{2,}->[0-9bf?]{2,} 74 kDxD, // [0-9]+(x[0-9]+)+ 75 kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* 76 kIdent, // other identifiers 77 kString, // "abcd\"\n" 78 kInt, // 42 79 kDecimal, // 4.2 80 }; 81 82 std::string TokKindToString(TokKind kind); 83 84 // Lexer for the HloModule::ToString() format text. 85 // 86 // This class is meant to be used by hlo_parser.cc. You shouldn't need to use 87 // it directly. 88 class HloLexer { 89 public: HloLexer(absl::string_view buf)90 explicit HloLexer(absl::string_view buf) : buf_(buf) { 91 current_ptr_ = buf_.data(); 92 } 93 Lex()94 TokKind Lex() { return token_state_.current_kind = LexToken(); } 95 GetKind()96 TokKind GetKind() const { return token_state_.current_kind; } GetStrVal()97 std::string GetStrVal() const { 98 switch (GetKind()) { 99 case TokKind::kName: 100 case TokKind::kAttributeName: 101 case TokKind::kDimLabels: 102 case TokKind::kDxD: 103 case TokKind::kPad: 104 case TokKind::kString: 105 case TokKind::kIdent: 106 return token_state_.str_val; 107 default: 108 LOG(FATAL) << "This token does not have string value"; 109 } 110 } GetInt64Val()111 int64_t GetInt64Val() const { 112 CHECK(GetKind() == TokKind::kInt) << TokKindToString(GetKind()); 113 return token_state_.int64_val; 114 } GetDecimalVal()115 double GetDecimalVal() const { 116 CHECK(GetKind() == TokKind::kDecimal); 117 return token_state_.decimal_val; 118 } GetPrimitiveTypeVal()119 PrimitiveType GetPrimitiveTypeVal() const { 120 CHECK(GetKind() == TokKind::kPrimitiveType); 121 return token_state_.primitive_type_val; 122 } 123 124 typedef const char* LocTy; 125 126 // Returns the location of the current token. GetLoc()127 LocTy GetLoc() const { return token_state_.token_start; } 128 129 // Returns the line and column of a location in the buffer. 130 std::pair<unsigned, unsigned> GetLineAndColumn(LocTy location) const; 131 132 // Returns the whole line given the location. 133 absl::string_view GetLine(LocTy loc) const; 134 135 // Looks ahead one token and returns it. Lexer state is unchanged. 136 TokKind LookAhead(); 137 138 private: 139 // Returns the current character. If it's neither the end of input buffer nor 140 // an invalid character, moves the pointer forward. 141 int GetNextChar(); 142 143 // Returns the current character. 144 int PeekCurrentChar() const; 145 146 // Creates string_view with the given begin and end. Exits if the begin > end, 147 // or it's out of the range of the current buffer. 148 absl::string_view StringViewFromPointers(const char* begin, 149 const char* end) const; 150 151 // Returns true if the given ptr is dereferenceable within the range of the 152 // current buffer. 153 bool CanDereference(const char* ptr) const; 154 155 TokKind LexToken(); 156 157 TokKind LexIdentifier(); 158 TokKind LexPercent(); 159 TokKind LexShape(); 160 TokKind LexConstant(); 161 TokKind LexNumberOrPattern(); 162 TokKind LexString(); 163 164 std::optional<int64_t> LexNanPayload(absl::string_view& consumable); 165 166 absl::string_view buf_; 167 const char* current_ptr_; 168 169 // Information about the current token. 170 struct TokenState { 171 const char* token_start = nullptr; 172 TokKind current_kind; 173 std::string str_val; 174 int64_t int64_val; 175 double decimal_val; 176 PrimitiveType primitive_type_val; 177 }; 178 TokenState token_state_; 179 180 struct LineNoCacheTy { 181 const char* last_query; 182 unsigned line_no_of_query; 183 }; 184 // This caches the line number of the previous query. 185 mutable LineNoCacheTy line_no_cache_{nullptr, 0}; 186 }; 187 188 } // namespace xla 189 190 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ 191