1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15 16"""Parsing Ops.""" 17from tensorflow.python.framework import ops 18from tensorflow.python.framework import sparse_tensor 19from tensorflow.python.ops import array_ops 20from tensorflow.python.ops import control_flow_ops 21from tensorflow.python.ops import gen_parsing_ops 22from tensorflow.python.ops import math_ops 23from tensorflow.python.ops import parsing_config 24# go/tf-wildcard-import 25# pylint: disable=wildcard-import,undefined-variable 26from tensorflow.python.ops.gen_parsing_ops import * 27# pylint: enable=wildcard-import,undefined-variable 28from tensorflow.python.util import deprecation 29from tensorflow.python.util import dispatch 30from tensorflow.python.util.tf_export import tf_export 31 32 33ops.NotDifferentiable("DecodeRaw") 34ops.NotDifferentiable("DecodePaddedRaw") 35ops.NotDifferentiable("ParseTensor") 36ops.NotDifferentiable("SerializeTensor") 37ops.NotDifferentiable("StringToNumber") 38 39 40VarLenFeature = parsing_config.VarLenFeature 41RaggedFeature = parsing_config.RaggedFeature 42SparseFeature = parsing_config.SparseFeature 43FixedLenFeature = parsing_config.FixedLenFeature 44FixedLenSequenceFeature = parsing_config.FixedLenSequenceFeature 45# pylint: disable=protected-access 46_ParseOpParams = parsing_config._ParseOpParams 47_construct_tensors_for_composite_features = ( 48 parsing_config._construct_tensors_for_composite_features) 49# pylint: enable=protected-access 50 51 52# TODO(b/122887740) Switch files that use this private symbol to use new name. 53_construct_sparse_tensors_for_sparse_features = \ 54 _construct_tensors_for_composite_features 55 56 57def _prepend_none_dimension(features): 58 """Returns a copy of features with adjusted FixedLenSequenceFeature shapes.""" 59 if features: 60 modified_features = dict(features) # Create a copy to modify 61 for key, feature in features.items(): 62 if isinstance(feature, FixedLenSequenceFeature): 63 if not feature.allow_missing: 64 raise ValueError("Unsupported: FixedLenSequenceFeature requires " 65 "allow_missing to be True.") 66 modified_features[key] = FixedLenSequenceFeature( 67 [None] + list(feature.shape), 68 feature.dtype, 69 feature.allow_missing, 70 feature.default_value) 71 return modified_features 72 else: 73 return features 74 75 76@tf_export("io.parse_example", v1=[]) 77@dispatch.add_dispatch_support 78def parse_example_v2(serialized, features, example_names=None, name=None): 79 # pylint: disable=line-too-long 80 """Parses `Example` protos into a `dict` of tensors. 81 82 Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 83 protos given in `serialized`. We refer to `serialized` as a batch with 84 `batch_size` many entries of individual `Example` protos. 85 86 `example_names` may contain descriptive names for the corresponding serialized 87 protos. These may be useful for debugging purposes, but they have no effect on 88 the output. If not `None`, `example_names` must be the same length as 89 `serialized`. 90 91 This op parses serialized examples into a dictionary mapping keys to `Tensor` 92 `SparseTensor`, and `RaggedTensor` objects. `features` is a dict from keys to 93 `VarLenFeature`, `SparseFeature`, `RaggedFeature`, and `FixedLenFeature` 94 objects. Each `VarLenFeature` and `SparseFeature` is mapped to a 95 `SparseTensor`; each `FixedLenFeature` is mapped to a `Tensor`; and each 96 `RaggedFeature` is mapped to a `RaggedTensor`. 97 98 Each `VarLenFeature` maps to a `SparseTensor` of the specified type 99 representing a ragged matrix. Its indices are `[batch, index]` where `batch` 100 identifies the example in `serialized`, and `index` is the value's index in 101 the list of values associated with that feature and example. 102 103 Each `SparseFeature` maps to a `SparseTensor` of the specified type 104 representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`. 105 Its `values` come from the feature in the examples with key `value_key`. 106 A `values[i]` comes from a position `k` in the feature of an example at batch 107 entry `batch`. This positional information is recorded in `indices[i]` as 108 `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of 109 the feature in the example at with key `SparseFeature.index_key[j]`. 110 In other words, we split the indices (except the first index indicating the 111 batch entry) of a `SparseTensor` by dimension into different features of the 112 `Example`. Due to its complexity a `VarLenFeature` should be preferred over a 113 `SparseFeature` whenever possible. 114 115 Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or 116 `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`. 117 118 `FixedLenFeature` entries with a `default_value` are optional. With no default 119 value, we will fail if that `Feature` is missing from any example in 120 `serialized`. 121 122 Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type 123 (or `tf.float32` if not specified) and shape 124 `(serialized.size(), None) + df.shape`. 125 All examples in `serialized` will be padded with `default_value` along the 126 second dimension. 127 128 Each `RaggedFeature` maps to a `RaggedTensor` of the specified type. It 129 is formed by stacking the `RaggedTensor` for each example, where the 130 `RaggedTensor` for each individual example is constructed using the tensors 131 specified by `RaggedTensor.values_key` and `RaggedTensor.partition`. See 132 the `tf.io.RaggedFeature` documentation for details and examples. 133 134 Examples: 135 136 For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three 137 serialized `Example`s are provided: 138 139 ``` 140 serialized = [ 141 features 142 { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } }, 143 features 144 { feature []}, 145 features 146 { feature { key: "ft" value { float_list { value: [3.0] } } } 147 ] 148 ``` 149 150 then the output will look like: 151 152 ```python 153 {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]], 154 values=[1.0, 2.0, 3.0], 155 dense_shape=(3, 2)) } 156 ``` 157 158 If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and 159 `shape=[]` is used then the output will look like: 160 161 ```python 162 {"ft": [[1.0, 2.0], [3.0, -1.0]]} 163 ``` 164 165 Given two `Example` input protos in `serialized`: 166 167 ``` 168 [ 169 features { 170 feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } } 171 feature { key: "gps" value { float_list { value: [] } } } 172 }, 173 features { 174 feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } } 175 feature { key: "dank" value { int64_list { value: [ 42 ] } } } 176 feature { key: "gps" value { } } 177 } 178 ] 179 ``` 180 181 And arguments 182 183 ``` 184 example_names: ["input0", "input1"], 185 features: { 186 "kw": VarLenFeature(tf.string), 187 "dank": VarLenFeature(tf.int64), 188 "gps": VarLenFeature(tf.float32), 189 } 190 ``` 191 192 Then the output is a dictionary: 193 194 ```python 195 { 196 "kw": SparseTensor( 197 indices=[[0, 0], [0, 1], [1, 0]], 198 values=["knit", "big", "emmy"] 199 dense_shape=[2, 2]), 200 "dank": SparseTensor( 201 indices=[[1, 0]], 202 values=[42], 203 dense_shape=[2, 1]), 204 "gps": SparseTensor( 205 indices=[], 206 values=[], 207 dense_shape=[2, 0]), 208 } 209 ``` 210 211 For dense results in two serialized `Example`s: 212 213 ``` 214 [ 215 features { 216 feature { key: "age" value { int64_list { value: [ 0 ] } } } 217 feature { key: "gender" value { bytes_list { value: [ "f" ] } } } 218 }, 219 features { 220 feature { key: "age" value { int64_list { value: [] } } } 221 feature { key: "gender" value { bytes_list { value: [ "f" ] } } } 222 } 223 ] 224 ``` 225 226 We can use arguments: 227 228 ``` 229 example_names: ["input0", "input1"], 230 features: { 231 "age": FixedLenFeature([], dtype=tf.int64, default_value=-1), 232 "gender": FixedLenFeature([], dtype=tf.string), 233 } 234 ``` 235 236 And the expected output is: 237 238 ```python 239 { 240 "age": [[0], [-1]], 241 "gender": [["f"], ["f"]], 242 } 243 ``` 244 245 An alternative to `VarLenFeature` to obtain a `SparseTensor` is 246 `SparseFeature`. For example, given two `Example` input protos in 247 `serialized`: 248 249 ``` 250 [ 251 features { 252 feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } } 253 feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } } 254 }, 255 features { 256 feature { key: "val" value { float_list { value: [ 0.0 ] } } } 257 feature { key: "ix" value { int64_list { value: [ 42 ] } } } 258 } 259 ] 260 ``` 261 262 And arguments 263 264 ``` 265 example_names: ["input0", "input1"], 266 features: { 267 "sparse": SparseFeature( 268 index_key="ix", value_key="val", dtype=tf.float32, size=100), 269 } 270 ``` 271 272 Then the output is a dictionary: 273 274 ```python 275 { 276 "sparse": SparseTensor( 277 indices=[[0, 3], [0, 20], [1, 42]], 278 values=[0.5, -1.0, 0.0] 279 dense_shape=[2, 100]), 280 } 281 ``` 282 283 See the `tf.io.RaggedFeature` documentation for examples showing how 284 `RaggedFeature` can be used to obtain `RaggedTensor`s. 285 286 Args: 287 serialized: A vector (1-D Tensor) of strings, a batch of binary 288 serialized `Example` protos. 289 features: A `dict` mapping feature keys to `FixedLenFeature`, 290 `VarLenFeature`, `SparseFeature`, and `RaggedFeature` values. 291 example_names: A vector (1-D Tensor) of strings (optional), the names of 292 the serialized protos in the batch. 293 name: A name for this operation (optional). 294 295 Returns: 296 A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and 297 `RaggedTensor` values. 298 299 Raises: 300 ValueError: if any feature is invalid. 301 """ 302 if not features: 303 raise ValueError("Argument `features` cannot be None.") 304 features = _prepend_none_dimension(features) 305 params = _ParseOpParams.from_features(features, [ 306 VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature, 307 RaggedFeature 308 ]) 309 310 outputs = _parse_example_raw(serialized, example_names, params, name=name) 311 return _construct_tensors_for_composite_features(features, outputs) 312 313 314@tf_export(v1=["io.parse_example", "parse_example"]) 315@dispatch.add_dispatch_support 316def parse_example(serialized, features, name=None, example_names=None): 317 return parse_example_v2(serialized, features, example_names, name) 318 319 320parse_example.__doc__ = parse_example_v2.__doc__ 321 322 323def _parse_example_raw(serialized, names, params, name): 324 """Parses `Example` protos. 325 326 Args: 327 serialized: A vector (1-D Tensor) of strings, a batch of binary 328 serialized `Example` protos. 329 names: A vector (1-D Tensor) of strings (optional), the names of 330 the serialized protos. 331 params: A `ParseOpParams` containing the parameters for the parse op. 332 name: A name for this operation (optional). 333 334 Returns: 335 A `dict` mapping keys to `Tensor`s and `SparseTensor`s and `RaggedTensor`s. 336 337 """ 338 if params.num_features == 0: 339 raise ValueError("Must provide at least one feature key.") 340 with ops.name_scope(name, "ParseExample", [serialized, names]): 341 names = [] if names is None else names 342 serialized = ops.convert_to_tensor(serialized, name="serialized") 343 if params.ragged_keys and serialized.shape.ndims is None: 344 raise ValueError("serialized must have statically-known rank to " 345 "parse ragged features.") 346 outputs = gen_parsing_ops.parse_example_v2( 347 serialized=serialized, 348 names=names, 349 sparse_keys=params.sparse_keys, 350 dense_keys=params.dense_keys, 351 ragged_keys=params.ragged_keys, 352 dense_defaults=params.dense_defaults_vec, 353 num_sparse=len(params.sparse_keys), 354 sparse_types=params.sparse_types, 355 ragged_value_types=params.ragged_value_types, 356 ragged_split_types=params.ragged_split_types, 357 dense_shapes=params.dense_shapes_as_proto, 358 name=name) 359 (sparse_indices, sparse_values, sparse_shapes, dense_values, 360 ragged_values, ragged_row_splits) = outputs 361 # pylint: disable=protected-access 362 ragged_tensors = parsing_config._build_ragged_tensors( 363 serialized.shape, ragged_values, ragged_row_splits) 364 365 sparse_tensors = [ 366 sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape) 367 in zip(sparse_indices, sparse_values, sparse_shapes)] 368 369 return dict( 370 zip(params.sparse_keys + params.dense_keys + params.ragged_keys, 371 sparse_tensors + dense_values + ragged_tensors)) 372 373 374@tf_export(v1=["io.parse_single_example", "parse_single_example"]) 375@dispatch.add_dispatch_support 376def parse_single_example(serialized, features, name=None, example_names=None): 377 """Parses a single `Example` proto. 378 379 Similar to `parse_example`, except: 380 381 For dense tensors, the returned `Tensor` is identical to the output of 382 `parse_example`, except there is no batch dimension, the output shape is the 383 same as the shape given in `dense_shape`. 384 385 For `SparseTensor`s, the first (batch) column of the indices matrix is removed 386 (the indices matrix is a column vector), the values vector is unchanged, and 387 the first (`batch_size`) entry of the shape vector is removed (it is now a 388 single element vector). 389 390 One might see performance advantages by batching `Example` protos with 391 `parse_example` instead of using this function directly. 392 393 Args: 394 serialized: A scalar string Tensor, a single serialized Example. 395 features: A `dict` mapping feature keys to `FixedLenFeature` or 396 `VarLenFeature` values. 397 name: A name for this operation (optional). 398 example_names: (Optional) A scalar string Tensor, the associated name. 399 400 Returns: 401 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 402 403 Raises: 404 ValueError: if any feature is invalid. 405 """ 406 return parse_single_example_v2(serialized, features, example_names, name) 407 408 409@tf_export("io.parse_single_example", v1=[]) 410@dispatch.add_dispatch_support 411def parse_single_example_v2( 412 serialized, features, example_names=None, name=None 413 ): 414 """Parses a single `Example` proto. 415 416 Similar to `parse_example`, except: 417 418 For dense tensors, the returned `Tensor` is identical to the output of 419 `parse_example`, except there is no batch dimension, the output shape is the 420 same as the shape given in `dense_shape`. 421 422 For `SparseTensor`s, the first (batch) column of the indices matrix is removed 423 (the indices matrix is a column vector), the values vector is unchanged, and 424 the first (`batch_size`) entry of the shape vector is removed (it is now a 425 single element vector). 426 427 One might see performance advantages by batching `Example` protos with 428 `parse_example` instead of using this function directly. 429 430 Args: 431 serialized: A scalar string Tensor, a single serialized Example. 432 features: A `dict` mapping feature keys to `FixedLenFeature` or 433 `VarLenFeature` values. 434 example_names: (Optional) A scalar string Tensor, the associated name. 435 name: A name for this operation (optional). 436 437 Returns: 438 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 439 440 Raises: 441 ValueError: if any feature is invalid. 442 """ 443 if not features: 444 raise ValueError("Invalid argument: features cannot be None.") 445 with ops.name_scope(name, "ParseSingleExample", [serialized, example_names]): 446 serialized = ops.convert_to_tensor(serialized, name="serialized") 447 serialized = _assert_scalar(serialized, "serialized") 448 return parse_example_v2(serialized, features, example_names, name) 449 450 451@tf_export("io.parse_sequence_example") 452@dispatch.add_dispatch_support 453def parse_sequence_example(serialized, 454 context_features=None, 455 sequence_features=None, 456 example_names=None, 457 name=None): 458 # pylint: disable=line-too-long 459 """Parses a batch of `SequenceExample` protos. 460 461 Parses a vector of serialized 462 [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 463 protos given in `serialized`. 464 465 This op parses serialized sequence examples into a tuple of dictionaries, 466 each mapping keys to `Tensor` and `SparseTensor` objects. 467 The first dictionary contains mappings for keys appearing in 468 `context_features`, and the second dictionary contains mappings for keys 469 appearing in `sequence_features`. 470 471 At least one of `context_features` and `sequence_features` must be provided 472 and non-empty. 473 474 The `context_features` keys are associated with a `SequenceExample` as a 475 whole, independent of time / frame. In contrast, the `sequence_features` keys 476 provide a way to access variable-length data within the `FeatureList` section 477 of the `SequenceExample` proto. While the shapes of `context_features` values 478 are fixed with respect to frame, the frame dimension (the first dimension) 479 of `sequence_features` values may vary between `SequenceExample` protos, 480 and even between `feature_list` keys within the same `SequenceExample`. 481 482 `context_features` contains `VarLenFeature`, `RaggedFeature`, and 483 `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a 484 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each 485 `FixedLenFeature` is mapped to a `Tensor`, of the specified type, shape, and 486 default value. 487 488 `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and 489 `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a 490 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and 491 each `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified 492 type. The shape will be `(B,T,) + df.dense_shape` for 493 `FixedLenSequenceFeature` `df`, where `B` is the batch size, and `T` is the 494 length of the associated `FeatureList` in the `SequenceExample`. For instance, 495 `FixedLenSequenceFeature([])` yields a scalar 2-D `Tensor` of static shape 496 `[None, None]` and dynamic shape `[B, T]`, while 497 `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 3-D matrix `Tensor` 498 of static shape `[None, None, k]` and dynamic shape `[B, T, k]`. 499 500 Like the input, the resulting output tensors have a batch dimension. This 501 means that the original per-example shapes of `VarLenFeature`s and 502 `FixedLenSequenceFeature`s can be lost. To handle that situation, this op also 503 provides dicts of shape tensors as part of the output. There is one dict for 504 the context features, and one for the feature_list features. Context features 505 of type `FixedLenFeature`s will not be present, since their shapes are already 506 known by the caller. In situations where the input `FixedLenSequenceFeature`s 507 are of different sequence lengths across examples, the shorter examples will 508 be padded with default datatype values: 0 for numeric types, and the empty 509 string for string types. 510 511 Each `SparseTensor` corresponding to `sequence_features` represents a ragged 512 vector. Its indices are `[time, index]`, where `time` is the `FeatureList` 513 entry and `index` is the value's index in the list of values associated with 514 that time. 515 516 `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature` 517 entries with `allow_missing=True` are optional; otherwise, we will fail if 518 that `Feature` or `FeatureList` is missing from any example in `serialized`. 519 520 `example_name` may contain a descriptive name for the corresponding serialized 521 proto. This may be useful for debugging purposes, but it has no effect on the 522 output. If not `None`, `example_name` must be a scalar. 523 524 Args: 525 serialized: A vector (1-D Tensor) of type string containing binary 526 serialized `SequenceExample` protos. 527 context_features: A `dict` mapping feature keys to `FixedLenFeature` or 528 `VarLenFeature` or `RaggedFeature` values. These features are associated 529 with a `SequenceExample` as a whole. 530 sequence_features: A `dict` mapping feature keys to 531 `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values. 532 These features are associated with data within the `FeatureList` section 533 of the `SequenceExample` proto. 534 example_names: A vector (1-D Tensor) of strings (optional), the name of the 535 serialized protos. 536 name: A name for this operation (optional). 537 538 Returns: 539 A tuple of three `dict`s, each mapping keys to `Tensor`s, 540 `SparseTensor`s, and `RaggedTensor`. The first dict contains the context 541 key/values, the second dict contains the feature_list key/values, and the 542 final dict contains the lengths of any dense feature_list features. 543 544 Raises: 545 ValueError: if any feature is invalid. 546 """ 547 if not (context_features or sequence_features): 548 raise ValueError("Both `context_features` and `sequence_features` argument " 549 "are None, but at least one should have values.") 550 context_params = _ParseOpParams.from_features( 551 context_features, [VarLenFeature, FixedLenFeature, RaggedFeature]) 552 feature_list_params = _ParseOpParams.from_features( 553 sequence_features, 554 [VarLenFeature, FixedLenSequenceFeature, RaggedFeature]) 555 556 with ops.name_scope(name, "ParseSequenceExample", 557 [serialized, example_names]): 558 outputs = _parse_sequence_example_raw(serialized, example_names, 559 context_params, feature_list_params, 560 name) 561 context_output, feature_list_output, feature_list_lengths = outputs 562 563 if context_params.ragged_keys: 564 context_output = _construct_tensors_for_composite_features( 565 context_features, context_output) 566 if feature_list_params.ragged_keys: 567 feature_list_output = _construct_tensors_for_composite_features( 568 sequence_features, feature_list_output) 569 570 return context_output, feature_list_output, feature_list_lengths 571 572 573def _parse_sequence_example_raw(serialized, 574 debug_name, 575 context, 576 feature_list, 577 name=None): 578 """Parses a vector of `SequenceExample` protos. 579 580 Args: 581 serialized: A vector (1-D Tensor) of type string, containing binary 582 serialized `SequenceExample` protos. 583 debug_name: A vector (1-D Tensor) of strings (optional), the names of the 584 serialized protos. 585 context: A `ParseOpParams` containing the parameters for the parse 586 op for the context features. 587 feature_list: A `ParseOpParams` containing the parameters for the 588 parse op for the feature_list features. 589 name: A name for this operation (optional). 590 591 Returns: 592 A tuple of three `dict`s, each mapping keys to `Tensor`s, `SparseTensor`s, 593 and `RaggedTensor`s. The first dict contains the context key/values, the 594 second dict contains the feature_list key/values, and the final dict 595 contains the lengths of any dense feature_list features. 596 597 Raises: 598 TypeError: if feature_list.dense_defaults is not either None or a dict. 599 """ 600 if context.num_features + feature_list.num_features == 0: 601 raise ValueError("Must provide at least one feature key.") 602 with ops.name_scope(name, "ParseSequenceExample", [serialized]): 603 debug_name = [] if debug_name is None else debug_name 604 605 # Internal 606 feature_list_dense_missing_assumed_empty = [] 607 for k, v in feature_list.dense_defaults.items(): 608 if v is not None: 609 raise ValueError("Value feature_list.dense_defaults[%s] must be None" % 610 k) 611 feature_list_dense_missing_assumed_empty.append(k) 612 613 has_ragged = context.ragged_keys or feature_list.ragged_keys 614 serialized = ops.convert_to_tensor(serialized, name="serialized") 615 if has_ragged and serialized.shape.ndims is None: 616 raise ValueError("serialized must have statically-known rank to " 617 "parse ragged features.") 618 feature_list_dense_missing_assumed_empty_vector = [ 619 key in feature_list_dense_missing_assumed_empty 620 for key in feature_list.dense_keys 621 ] 622 outputs = gen_parsing_ops.parse_sequence_example_v2( 623 # Inputs 624 serialized=serialized, 625 debug_name=debug_name, 626 context_sparse_keys=context.sparse_keys, 627 context_dense_keys=context.dense_keys, 628 context_ragged_keys=context.ragged_keys, 629 feature_list_sparse_keys=feature_list.sparse_keys, 630 feature_list_dense_keys=feature_list.dense_keys, 631 feature_list_ragged_keys=feature_list.ragged_keys, 632 feature_list_dense_missing_assumed_empty=( 633 feature_list_dense_missing_assumed_empty_vector), 634 context_dense_defaults=context.dense_defaults_vec, 635 # Attrs 636 Ncontext_sparse=len(context.sparse_keys), 637 Nfeature_list_sparse=len(feature_list.sparse_keys), 638 Nfeature_list_dense=len(feature_list.dense_keys), 639 context_sparse_types=context.sparse_types, 640 context_ragged_value_types=context.ragged_value_types, 641 context_ragged_split_types=context.ragged_split_types, 642 feature_list_dense_types=feature_list.dense_types, 643 feature_list_sparse_types=feature_list.sparse_types, 644 feature_list_ragged_value_types=feature_list.ragged_value_types, 645 feature_list_ragged_split_types=feature_list.ragged_split_types, 646 context_dense_shapes=context.dense_shapes_as_proto, 647 feature_list_dense_shapes=feature_list.dense_shapes, 648 name=name) 649 (context_sparse_indices, context_sparse_values, context_sparse_shapes, 650 context_dense_values, context_ragged_values, context_ragged_row_splits, 651 feature_list_sparse_indices, feature_list_sparse_values, 652 feature_list_sparse_shapes, feature_list_dense_values, 653 feature_list_dense_lengths, feature_list_ragged_values, 654 feature_list_ragged_outer_splits, 655 feature_list_ragged_inner_splits) = outputs 656 # pylint: disable=protected-access 657 context_ragged_tensors = parsing_config._build_ragged_tensors( 658 serialized.shape, context_ragged_values, context_ragged_row_splits) 659 feature_list_ragged_tensors = parsing_config._build_ragged_tensors( 660 serialized.shape, feature_list_ragged_values, 661 feature_list_ragged_outer_splits, feature_list_ragged_inner_splits) 662 663 # pylint: disable=g-complex-comprehension 664 context_sparse_tensors = [ 665 sparse_tensor.SparseTensor(ix, val, shape) 666 for (ix, val, 667 shape) in zip(context_sparse_indices, context_sparse_values, 668 context_sparse_shapes) 669 ] 670 671 feature_list_sparse_tensors = [ 672 sparse_tensor.SparseTensor(ix, val, shape) 673 for (ix, val, shape 674 ) in zip(feature_list_sparse_indices, feature_list_sparse_values, 675 feature_list_sparse_shapes) 676 ] 677 # pylint: enable=g-complex-comprehension 678 679 context_output = dict( 680 zip( 681 context.sparse_keys + context.dense_keys + context.ragged_keys, 682 context_sparse_tensors + context_dense_values + 683 context_ragged_tensors)) 684 feature_list_output = dict( 685 zip( 686 feature_list.sparse_keys + feature_list.dense_keys + 687 feature_list.ragged_keys, feature_list_sparse_tensors + 688 feature_list_dense_values + feature_list_ragged_tensors)) 689 feature_list_lengths = dict( 690 zip(feature_list.dense_keys, feature_list_dense_lengths)) 691 692 return (context_output, feature_list_output, feature_list_lengths) 693 694 695@tf_export("io.parse_single_sequence_example", 696 v1=["io.parse_single_sequence_example", 697 "parse_single_sequence_example"]) 698@dispatch.add_dispatch_support 699def parse_single_sequence_example( 700 serialized, context_features=None, sequence_features=None, 701 example_name=None, name=None): 702 # pylint: disable=line-too-long 703 """Parses a single `SequenceExample` proto. 704 705 Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 706 proto given in `serialized`. 707 708 This op parses a serialized sequence example into a tuple of dictionaries, 709 each mapping keys to `Tensor` and `SparseTensor` objects. 710 The first dictionary contains mappings for keys appearing in 711 `context_features`, and the second dictionary contains mappings for keys 712 appearing in `sequence_features`. 713 714 At least one of `context_features` and `sequence_features` must be provided 715 and non-empty. 716 717 The `context_features` keys are associated with a `SequenceExample` as a 718 whole, independent of time / frame. In contrast, the `sequence_features` keys 719 provide a way to access variable-length data within the `FeatureList` section 720 of the `SequenceExample` proto. While the shapes of `context_features` values 721 are fixed with respect to frame, the frame dimension (the first dimension) 722 of `sequence_features` values may vary between `SequenceExample` protos, 723 and even between `feature_list` keys within the same `SequenceExample`. 724 725 `context_features` contains `VarLenFeature`, `RaggedFeature`, and 726 `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a `SparseTensor`; 727 each `RaggedFeature` is mapped to a `RaggedTensor`; and each `FixedLenFeature` 728 is mapped to a `Tensor`, of the specified type, shape, and default value. 729 730 `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and 731 `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a 732 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each 733 `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type. 734 The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`, 735 where `T` is the length of the associated `FeatureList` in the 736 `SequenceExample`. For instance, `FixedLenSequenceFeature([])` yields a scalar 737 1-D `Tensor` of static shape `[None]` and dynamic shape `[T]`, while 738 `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor` 739 of static shape `[None, k]` and dynamic shape `[T, k]`. 740 741 Each `SparseTensor` corresponding to `sequence_features` represents a ragged 742 vector. Its indices are `[time, index]`, where `time` is the `FeatureList` 743 entry and `index` is the value's index in the list of values associated with 744 that time. 745 746 `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature` 747 entries with `allow_missing=True` are optional; otherwise, we will fail if 748 that `Feature` or `FeatureList` is missing from any example in `serialized`. 749 750 `example_name` may contain a descriptive name for the corresponding serialized 751 proto. This may be useful for debugging purposes, but it has no effect on the 752 output. If not `None`, `example_name` must be a scalar. 753 754 Note that the batch version of this function, `tf.parse_sequence_example`, 755 is written for better memory efficiency and will be faster on large 756 `SequenceExample`s. 757 758 Args: 759 serialized: A scalar (0-D Tensor) of type string, a single binary 760 serialized `SequenceExample` proto. 761 context_features: A `dict` mapping feature keys to `FixedLenFeature` or 762 `VarLenFeature` or `RaggedFeature` values. These features are associated 763 with a `SequenceExample` as a whole. 764 sequence_features: A `dict` mapping feature keys to 765 `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values. 766 These features are associated with data within the `FeatureList` section 767 of the `SequenceExample` proto. 768 example_name: A scalar (0-D Tensor) of strings (optional), the name of 769 the serialized proto. 770 name: A name for this operation (optional). 771 772 Returns: 773 A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s 774 and `RaggedTensor`s. 775 776 * The first dict contains the context key/values. 777 * The second dict contains the feature_list key/values. 778 779 Raises: 780 ValueError: if any feature is invalid. 781 """ 782 # pylint: enable=line-too-long 783 if not (context_features or sequence_features): 784 raise ValueError("Both context_features and sequence_features are None, but" 785 " at least one should have values.") 786 context_params = _ParseOpParams.from_features( 787 context_features, [VarLenFeature, FixedLenFeature, RaggedFeature]) 788 feature_list_params = _ParseOpParams.from_features( 789 sequence_features, 790 [VarLenFeature, FixedLenSequenceFeature, RaggedFeature]) 791 792 with ops.name_scope(name, "ParseSingleSequenceExample", 793 [serialized, example_name]): 794 context_output, feature_list_output = ( 795 _parse_single_sequence_example_raw(serialized, context_params, 796 feature_list_params, example_name, 797 name)) 798 799 if context_params.ragged_keys: 800 context_output = _construct_tensors_for_composite_features( 801 context_features, context_output) 802 if feature_list_params.ragged_keys: 803 feature_list_output = _construct_tensors_for_composite_features( 804 sequence_features, feature_list_output) 805 806 return context_output, feature_list_output 807 808 809def _parse_single_sequence_example_raw(serialized, 810 context, 811 feature_list, 812 debug_name, 813 name=None): 814 """Parses a single `SequenceExample` proto. 815 816 Args: 817 serialized: A scalar (0-D Tensor) of type string, a single binary serialized 818 `SequenceExample` proto. 819 context: A `ParseOpParams` containing the parameters for the parse op for 820 the context features. 821 feature_list: A `ParseOpParams` containing the parameters for the parse op 822 for the feature_list features. 823 debug_name: A scalar (0-D Tensor) of strings (optional), the name of the 824 serialized proto. 825 name: A name for this operation (optional). 826 827 Returns: 828 A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s. 829 The first dict contains the context key/values. 830 The second dict contains the feature_list key/values. 831 832 Raises: 833 TypeError: if feature_list.dense_defaults is not either None or a dict. 834 """ 835 with ops.name_scope(name, "ParseSingleExample", [serialized, debug_name]): 836 serialized = ops.convert_to_tensor(serialized, name="serialized") 837 serialized = _assert_scalar(serialized, "serialized") 838 return _parse_sequence_example_raw(serialized, debug_name, context, 839 feature_list, name)[:2] 840 841 842@tf_export("io.decode_raw", v1=[]) 843@dispatch.add_dispatch_support 844def decode_raw(input_bytes, 845 out_type, 846 little_endian=True, 847 fixed_length=None, 848 name=None): 849 r"""Convert raw bytes from input tensor into numeric tensors. 850 851 Every component of the input tensor is interpreted as a sequence of bytes. 852 These bytes are then decoded as numbers in the format specified by `out_type`. 853 854 >>> tf.io.decode_raw(tf.constant("1"), tf.uint8) 855 <tf.Tensor: shape=(1,), dtype=uint8, numpy=array([49], dtype=uint8)> 856 >>> tf.io.decode_raw(tf.constant("1,2"), tf.uint8) 857 <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 44, 50], dtype=uint8)> 858 859 Note that the rank of the output tensor is always one more than the input one: 860 861 >>> tf.io.decode_raw(tf.constant(["1","2"]), tf.uint8).shape 862 TensorShape([2, 1]) 863 >>> tf.io.decode_raw(tf.constant([["1"],["2"]]), tf.uint8).shape 864 TensorShape([2, 1, 1]) 865 866 This is because each byte in the input is converted to a new value on the 867 output (if output type is `uint8` or `int8`, otherwise chunks of inputs get 868 coverted to a new value): 869 870 >>> tf.io.decode_raw(tf.constant("123"), tf.uint8) 871 <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 50, 51], dtype=uint8)> 872 >>> tf.io.decode_raw(tf.constant("1234"), tf.uint8) 873 <tf.Tensor: shape=(4,), dtype=uint8, numpy=array([49, 50, 51, 52], ... 874 >>> # chuncked output 875 >>> tf.io.decode_raw(tf.constant("12"), tf.uint16) 876 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([12849], dtype=uint16)> 877 >>> tf.io.decode_raw(tf.constant("1234"), tf.uint16) 878 <tf.Tensor: shape=(2,), dtype=uint16, numpy=array([12849, 13363], ... 879 >>> # int64 output 880 >>> tf.io.decode_raw(tf.constant("12345678"), tf.int64) 881 <tf.Tensor: ... numpy=array([4050765991979987505])> 882 >>> tf.io.decode_raw(tf.constant("1234567887654321"), tf.int64) 883 <tf.Tensor: ... numpy=array([4050765991979987505, 3544952156018063160])> 884 885 The operation allows specifying endianness via the `little_endian` parameter. 886 887 >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16) 888 <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2826], dtype=int16)> 889 >>> hex(2826) 890 '0xb0a' 891 >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16, little_endian=False) 892 <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2571], dtype=int16)> 893 >>> hex(2571) 894 '0xa0b' 895 896 If the elements of `input_bytes` are of different length, you must specify 897 `fixed_length`: 898 899 >>> tf.io.decode_raw(tf.constant([["1"],["23"]]), tf.uint8, fixed_length=4) 900 <tf.Tensor: shape=(2, 1, 4), dtype=uint8, numpy= 901 array([[[49, 0, 0, 0]], 902 [[50, 51, 0, 0]]], dtype=uint8)> 903 904 If the `fixed_length` value is larger that the length of the `out_type` dtype, 905 multiple values are generated: 906 907 >>> tf.io.decode_raw(tf.constant(["1212"]), tf.uint16, fixed_length=4) 908 <tf.Tensor: shape=(1, 2), dtype=uint16, numpy=array([[12849, 12849]], ... 909 910 If the input value is larger than `fixed_length`, it is truncated: 911 912 >>> x=''.join([chr(1), chr(2), chr(3), chr(4)]) 913 >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2) 914 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([513], dtype=uint16)> 915 >>> hex(513) 916 '0x201' 917 918 If `little_endian` and `fixed_length` are specified, truncation to the fixed 919 length occurs before endianness conversion: 920 921 >>> x=''.join([chr(1), chr(2), chr(3), chr(4)]) 922 >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2, little_endian=False) 923 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([258], dtype=uint16)> 924 >>> hex(258) 925 '0x102' 926 927 If input values all have the same length, then specifying `fixed_length` 928 equal to the size of the strings should not change output: 929 930 >>> x = ["12345678", "87654321"] 931 >>> tf.io.decode_raw(x, tf.int16) 932 <tf.Tensor: shape=(2, 4), dtype=int16, numpy= 933 array([[12849, 13363, 13877, 14391], 934 [14136, 13622, 13108, 12594]], dtype=int16)> 935 >>> tf.io.decode_raw(x, tf.int16, fixed_length=len(x[0])) 936 <tf.Tensor: shape=(2, 4), dtype=int16, numpy= 937 array([[12849, 13363, 13877, 14391], 938 [14136, 13622, 13108, 12594]], dtype=int16)> 939 940 Args: 941 input_bytes: 942 Each element of the input Tensor is converted to an array of bytes. 943 944 Currently, this must be a tensor of strings (bytes), although semantically 945 the operation should support any input. 946 out_type: 947 `DType` of the output. Acceptable types are `half`, `float`, `double`, 948 `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. 949 little_endian: 950 Whether the `input_bytes` data is in little-endian format. Data will be 951 converted into host byte order if necessary. 952 fixed_length: 953 If set, the first `fixed_length` bytes of each element will be converted. 954 Data will be zero-padded or truncated to the specified length. 955 956 `fixed_length` must be a multiple of the size of `out_type`. 957 958 `fixed_length` must be specified if the elements of `input_bytes` are of 959 variable length. 960 name: A name for the operation (optional). 961 962 Returns: 963 A `Tensor` object storing the decoded bytes. 964 """ 965 if fixed_length is not None: 966 return gen_parsing_ops.decode_padded_raw( 967 input_bytes, 968 fixed_length=fixed_length, 969 out_type=out_type, 970 little_endian=little_endian, 971 name=name) 972 else: 973 return gen_parsing_ops.decode_raw( 974 input_bytes, out_type, little_endian=little_endian, name=name) 975 976 977@tf_export(v1=["decode_raw", "io.decode_raw"]) 978@dispatch.add_dispatch_support 979@deprecation.deprecated_args(None, 980 "bytes is deprecated, use input_bytes instead", 981 "bytes") 982def decode_raw_v1( 983 input_bytes=None, 984 out_type=None, 985 little_endian=True, 986 name=None, 987 bytes=None # pylint: disable=redefined-builtin 988): 989 """Convert raw byte strings into tensors. 990 991 Args: 992 input_bytes: 993 Each element of the input Tensor is converted to an array of bytes. 994 out_type: 995 `DType` of the output. Acceptable types are `half`, `float`, `double`, 996 `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. 997 little_endian: 998 Whether the `input_bytes` data is in little-endian format. Data will be 999 converted into host byte order if necessary. 1000 name: A name for the operation (optional). 1001 bytes: Deprecated parameter. Use `input_bytes` instead. 1002 1003 Returns: 1004 A `Tensor` object storing the decoded bytes. 1005 """ 1006 input_bytes = deprecation.deprecated_argument_lookup("input_bytes", 1007 input_bytes, "bytes", 1008 bytes) 1009 1010 # out_type is a required positional argument in the original API, and had to 1011 # be changed to a keyword argument in order to facilitate the transition from 1012 # the reserved named `bytes` to `input_bytes`. Ensure it's still set. 1013 if out_type is None: 1014 raise ValueError( 1015 "decode_raw_v1() missing 1 positional argument: 'out_type'") 1016 1017 return gen_parsing_ops.decode_raw( 1018 input_bytes, out_type, little_endian=little_endian, name=name) 1019 1020 1021# Swap `name` and `na_value` for backward compatibility. 1022@tf_export(v1=["io.decode_csv", "decode_csv"]) 1023@dispatch.add_dispatch_support 1024@deprecation.deprecated_endpoints("decode_csv") 1025def decode_csv(records, 1026 record_defaults, 1027 field_delim=",", 1028 use_quote_delim=True, 1029 name=None, 1030 na_value="", 1031 select_cols=None): 1032 """Convert CSV records to tensors. Each column maps to one tensor. 1033 1034 RFC 4180 format is expected for the CSV records. 1035 (https://tools.ietf.org/html/rfc4180) 1036 Note that we allow leading and trailing spaces with int or float field. 1037 1038 Args: 1039 records: A `Tensor` of type `string`. 1040 Each string is a record/row in the csv and all records should have 1041 the same format. 1042 record_defaults: A list of `Tensor` objects with specific types. 1043 Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`. 1044 One tensor per column of the input record, with either a 1045 scalar default value for that column or an empty vector if the column is 1046 required. 1047 field_delim: An optional `string`. Defaults to `","`. 1048 char delimiter to separate fields in a record. 1049 use_quote_delim: An optional `bool`. Defaults to `True`. 1050 If false, treats double quotation marks as regular 1051 characters inside of the string fields (ignoring RFC 4180, Section 2, 1052 Bullet 5). 1053 name: A name for the operation (optional). 1054 na_value: Additional string to recognize as NA/NaN. 1055 select_cols: Optional sorted list of column indices to select. If specified, 1056 only this subset of columns will be parsed and returned. 1057 1058 Returns: 1059 A list of `Tensor` objects. Has the same type as `record_defaults`. 1060 Each tensor will have the same shape as records. 1061 1062 Raises: 1063 ValueError: If any of the arguments is malformed. 1064 """ 1065 return decode_csv_v2( 1066 records, record_defaults, 1067 field_delim, use_quote_delim, 1068 na_value, select_cols, name 1069 ) 1070 1071 1072@tf_export("io.decode_csv", v1=[]) 1073@dispatch.add_dispatch_support 1074def decode_csv_v2(records, 1075 record_defaults, 1076 field_delim=",", 1077 use_quote_delim=True, 1078 na_value="", 1079 select_cols=None, 1080 name=None): 1081 """Convert CSV records to tensors. Each column maps to one tensor. 1082 1083 RFC 4180 format is expected for the CSV records. 1084 (https://tools.ietf.org/html/rfc4180) 1085 Note that we allow leading and trailing spaces with int or float field. 1086 1087 Args: 1088 records: A `Tensor` of type `string`. 1089 Each string is a record/row in the csv and all records should have 1090 the same format. 1091 record_defaults: A list of `Tensor` objects with specific types. 1092 Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`. 1093 One tensor per column of the input record, with either a 1094 scalar default value for that column or an empty vector if the column is 1095 required. 1096 field_delim: An optional `string`. Defaults to `","`. 1097 char delimiter to separate fields in a record. 1098 use_quote_delim: An optional `bool`. Defaults to `True`. 1099 If false, treats double quotation marks as regular 1100 characters inside of the string fields (ignoring RFC 4180, Section 2, 1101 Bullet 5). 1102 na_value: Additional string to recognize as NA/NaN. 1103 select_cols: Optional sorted list of column indices to select. If specified, 1104 only this subset of columns will be parsed and returned. 1105 name: A name for the operation (optional). 1106 1107 Returns: 1108 A list of `Tensor` objects. Has the same type as `record_defaults`. 1109 Each tensor will have the same shape as records. 1110 1111 Raises: 1112 ValueError: If any of the arguments is malformed. 1113 """ 1114 if select_cols is not None and any(select_cols[i] >= select_cols[i + 1] 1115 for i in range(len(select_cols) - 1)): 1116 raise ValueError("select_cols is not strictly increasing.") 1117 if select_cols is not None and select_cols[0] < 0: 1118 raise ValueError("select_cols contains negative values.") 1119 if select_cols is not None and len(select_cols) != len(record_defaults): 1120 raise ValueError("Length of select_cols and record_defaults do not match.") 1121 return gen_parsing_ops.decode_csv( 1122 records=records, 1123 record_defaults=record_defaults, 1124 field_delim=field_delim, 1125 use_quote_delim=use_quote_delim, 1126 na_value=na_value, 1127 name=name, 1128 select_cols=select_cols, 1129 ) 1130 1131 1132def _assert_scalar(value, name): 1133 """Asserts that `value` is scalar, and returns `value`.""" 1134 value_rank = value.shape.rank 1135 if value_rank is None: 1136 check = control_flow_ops.Assert( 1137 math_ops.equal(array_ops.rank(value), 0), 1138 ["Input %s must be a scalar" % name], 1139 name="%sIsScalar" % name.capitalize()) 1140 result = control_flow_ops.with_dependencies([check], 1141 value, 1142 name="%sDependencies" % name) 1143 result.set_shape([]) 1144 return result 1145 elif value_rank == 0: 1146 return value 1147 else: 1148 raise ValueError("Input %s must be a scalar" % name) 1149 1150 1151@tf_export("io.decode_json_example", 1152 v1=["decode_json_example", "io.decode_json_example"]) 1153def decode_json_example(json_examples, name=None): 1154 r"""Convert JSON-encoded Example records to binary protocol buffer strings. 1155 1156 Note: This is **not** a general purpose JSON parsing op. 1157 1158 This op converts JSON-serialized `tf.train.Example` (maybe created with 1159 `json_format.MessageToJson`, following the 1160 [standard JSON mapping]( 1161 https://developers.google.com/protocol-buffers/docs/proto3#json)) 1162 to a binary-serialized `tf.train.Example` (equivalent to 1163 `Example.SerializeToString()`) suitable for conversion to tensors with 1164 `tf.io.parse_example`. 1165 1166 Here is a `tf.train.Example` proto: 1167 1168 >>> example = tf.train.Example( 1169 ... features=tf.train.Features( 1170 ... feature={ 1171 ... "a": tf.train.Feature( 1172 ... int64_list=tf.train.Int64List( 1173 ... value=[1, 1, 3]))})) 1174 1175 Here it is converted to JSON: 1176 1177 >>> from google.protobuf import json_format 1178 >>> example_json = json_format.MessageToJson(example) 1179 >>> print(example_json) 1180 { 1181 "features": { 1182 "feature": { 1183 "a": { 1184 "int64List": { 1185 "value": [ 1186 "1", 1187 "1", 1188 "3" 1189 ] 1190 } 1191 } 1192 } 1193 } 1194 } 1195 1196 This op converts the above json string to a binary proto: 1197 1198 >>> example_binary = tf.io.decode_json_example(example_json) 1199 >>> example_binary.numpy() 1200 b'\n\x0f\n\r\n\x01a\x12\x08\x1a\x06\x08\x01\x08\x01\x08\x03' 1201 1202 The OP works on string tensors of andy shape: 1203 1204 >>> tf.io.decode_json_example([ 1205 ... [example_json, example_json], 1206 ... [example_json, example_json]]).shape.as_list() 1207 [2, 2] 1208 1209 This resulting binary-string is equivalent to `Example.SerializeToString()`, 1210 and can be converted to Tensors using `tf.io.parse_example` and related 1211 functions: 1212 1213 >>> tf.io.parse_example( 1214 ... serialized=[example_binary.numpy(), 1215 ... example.SerializeToString()], 1216 ... features = {'a': tf.io.FixedLenFeature(shape=[3], dtype=tf.int64)}) 1217 {'a': <tf.Tensor: shape=(2, 3), dtype=int64, numpy= 1218 array([[1, 1, 3], 1219 [1, 1, 3]])>} 1220 1221 Args: 1222 json_examples: A string tensor containing json-serialized `tf.Example` 1223 protos. 1224 name: A name for the op. 1225 1226 Returns: 1227 A string Tensor containing the binary-serialized `tf.Example` protos. 1228 1229 Raises: 1230 `tf.errors.InvalidArgumentError`: If the JSON could not be converted to a 1231 `tf.Example` 1232 """ 1233 return gen_parsing_ops.decode_json_example(json_examples, name=name) 1234 1235 1236# Register elementwise ops that don't have Python wrappers. 1237dispatch.register_unary_elementwise_api(gen_parsing_ops.decode_compressed) 1238