1# Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for sequential_feature_column.""" 16 17import os 18 19from absl.testing import parameterized 20# Should remove this in future since it use a keras component for unit test. 21from keras.feature_column import dense_features 22import numpy as np 23 24from tensorflow.python.client import session 25from tensorflow.python.feature_column import feature_column_v2 as fc 26from tensorflow.python.feature_column import sequence_feature_column as sfc 27from tensorflow.python.feature_column import serialization 28from tensorflow.python.framework import dtypes 29from tensorflow.python.framework import ops 30from tensorflow.python.framework import sparse_tensor 31from tensorflow.python.framework import test_util 32from tensorflow.python.ops import array_ops 33from tensorflow.python.ops import lookup_ops 34from tensorflow.python.ops import math_ops 35from tensorflow.python.ops import sparse_ops 36from tensorflow.python.ops import variables as variables_lib 37from tensorflow.python.platform import test 38 39 40def _initialized_session(config=None): 41 sess = session.Session(config=config) 42 sess.run(variables_lib.global_variables_initializer()) 43 sess.run(lookup_ops.tables_initializer()) 44 return sess 45 46 47@test_util.run_all_in_graph_and_eager_modes 48class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase): 49 """Tests the utility fn concatenate_context_input.""" 50 51 def test_concatenate_context_input(self): 52 seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2)) 53 context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5)) 54 seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) 55 context_input = math_ops.cast(context_input, dtype=dtypes.float32) 56 input_layer = sfc.concatenate_context_input(context_input, seq_input) 57 58 expected = np.array([ 59 [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]], 60 [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]] 61 ], dtype=np.float32) 62 output = self.evaluate(input_layer) 63 self.assertAllEqual(expected, output) 64 65 @parameterized.named_parameters( 66 {'testcase_name': 'rank_lt_3', 67 'seq_input_arg': np.arange(100).reshape(10, 10)}, 68 {'testcase_name': 'rank_gt_3', 69 'seq_input_arg': np.arange(100).reshape(5, 5, 2, 2)} 70 ) 71 def test_sequence_input_throws_error(self, seq_input_arg): 72 seq_input = ops.convert_to_tensor(seq_input_arg) 73 context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) 74 seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) 75 context_input = math_ops.cast(context_input, dtype=dtypes.float32) 76 with self.assertRaisesRegex(ValueError, 'sequence_input must have rank 3'): 77 sfc.concatenate_context_input(context_input, seq_input) 78 79 @parameterized.named_parameters( 80 {'testcase_name': 'rank_lt_2', 81 'context_input_arg': np.arange(100)}, 82 {'testcase_name': 'rank_gt_2', 83 'context_input_arg': np.arange(100).reshape(5, 5, 4)} 84 ) 85 def test_context_input_throws_error(self, context_input_arg): 86 context_input = ops.convert_to_tensor(context_input_arg) 87 seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) 88 seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) 89 context_input = math_ops.cast(context_input, dtype=dtypes.float32) 90 with self.assertRaisesRegex(ValueError, 'context_input must have rank 2'): 91 sfc.concatenate_context_input(context_input, seq_input) 92 93 def test_integer_seq_input_throws_error(self): 94 seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) 95 context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) 96 context_input = math_ops.cast(context_input, dtype=dtypes.float32) 97 with self.assertRaisesRegex(TypeError, 98 'sequence_input must have dtype float32'): 99 sfc.concatenate_context_input(context_input, seq_input) 100 101 def test_integer_context_input_throws_error(self): 102 seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) 103 context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) 104 seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) 105 with self.assertRaisesRegex(TypeError, 106 'context_input must have dtype float32'): 107 sfc.concatenate_context_input(context_input, seq_input) 108 109 110def _assert_sparse_tensor_value(test_case, expected, actual): 111 _assert_sparse_tensor_indices_shape(test_case, expected, actual) 112 113 test_case.assertEqual( 114 np.array(expected.values).dtype, np.array(actual.values).dtype) 115 test_case.assertAllEqual(expected.values, actual.values) 116 117 118def _assert_sparse_tensor_indices_shape(test_case, expected, actual): 119 test_case.assertEqual(np.int64, np.array(actual.indices).dtype) 120 test_case.assertAllEqual(expected.indices, actual.indices) 121 122 test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype) 123 test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) 124 125 126def _get_sequence_dense_tensor(column, features): 127 return column.get_sequence_dense_tensor( 128 fc.FeatureTransformationCache(features), None) 129 130 131def _get_sequence_dense_tensor_state(column, features): 132 state_manager = fc._StateManagerImpl( 133 dense_features.DenseFeatures(column), trainable=True) 134 column.create_state(state_manager) 135 dense_tensor, lengths = column.get_sequence_dense_tensor( 136 fc.FeatureTransformationCache(features), state_manager) 137 return dense_tensor, lengths, state_manager 138 139 140def _get_sparse_tensors(column, features): 141 return column.get_sparse_tensors( 142 fc.FeatureTransformationCache(features), None) 143 144 145@test_util.run_all_in_graph_and_eager_modes 146class SequenceCategoricalColumnWithIdentityTest( 147 test.TestCase, parameterized.TestCase): 148 149 @parameterized.named_parameters( 150 {'testcase_name': '2D', 151 'inputs_args': { 152 'indices': ((0, 0), (1, 0), (1, 1)), 153 'values': (1, 2, 0), 154 'dense_shape': (2, 2)}, 155 'expected_args': { 156 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), 157 'values': np.array((1, 2, 0), dtype=np.int64), 158 'dense_shape': (2, 2, 1)}}, 159 {'testcase_name': '3D', 160 'inputs_args': { 161 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), 162 'values': (6, 7, 8), 163 'dense_shape': (2, 2, 2)}, 164 'expected_args': { 165 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), 166 'values': np.array((6, 7, 8), dtype=np.int64), 167 'dense_shape': (2, 2, 2)}} 168 ) 169 def test_get_sparse_tensors(self, inputs_args, expected_args): 170 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 171 expected = sparse_tensor.SparseTensorValue(**expected_args) 172 column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9) 173 174 id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs}) 175 176 self.assertIsNone(id_weight_pair.weight_tensor) 177 _assert_sparse_tensor_value( 178 self, expected, self.evaluate(id_weight_pair.id_tensor)) 179 180 def test_serialization(self): 181 """Tests that column can be serialized.""" 182 parent = sfc.sequence_categorical_column_with_identity( 183 'animal', num_buckets=4) 184 animal = fc.indicator_column(parent) 185 186 config = animal.get_config() 187 self.assertEqual( 188 { 189 'categorical_column': { 190 'class_name': 'SequenceCategoricalColumn', 191 'config': { 192 'categorical_column': { 193 'class_name': 'IdentityCategoricalColumn', 194 'config': { 195 'default_value': None, 196 'key': 'animal', 197 'number_buckets': 4 198 } 199 } 200 } 201 } 202 }, config) 203 204 new_animal = fc.IndicatorColumn.from_config(config) 205 self.assertEqual(animal, new_animal) 206 self.assertIsNot(parent, new_animal.categorical_column) 207 208 new_animal = fc.IndicatorColumn.from_config( 209 config, 210 columns_by_name={ 211 serialization._column_name_with_class_name(parent): parent 212 }) 213 self.assertEqual(animal, new_animal) 214 self.assertIs(parent, new_animal.categorical_column) 215 216 217@test_util.run_all_in_graph_and_eager_modes 218class SequenceCategoricalColumnWithHashBucketTest( 219 test.TestCase, parameterized.TestCase): 220 221 @parameterized.named_parameters( 222 {'testcase_name': '2D', 223 'inputs_args': { 224 'indices': ((0, 0), (1, 0), (1, 1)), 225 'values': ('omar', 'stringer', 'marlo'), 226 'dense_shape': (2, 2)}, 227 'expected_args': { 228 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), 229 # Ignored to avoid hash dependence in test. 230 'values': np.array((0, 0, 0), dtype=np.int64), 231 'dense_shape': (2, 2, 1)}}, 232 {'testcase_name': '3D', 233 'inputs_args': { 234 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), 235 'values': ('omar', 'stringer', 'marlo'), 236 'dense_shape': (2, 2, 2)}, 237 'expected_args': { 238 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), 239 # Ignored to avoid hash dependence in test. 240 'values': np.array((0, 0, 0), dtype=np.int64), 241 'dense_shape': (2, 2, 2)}} 242 ) 243 def test_get_sparse_tensors(self, inputs_args, expected_args): 244 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 245 expected = sparse_tensor.SparseTensorValue(**expected_args) 246 column = sfc.sequence_categorical_column_with_hash_bucket( 247 'aaa', hash_bucket_size=10) 248 249 id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs}) 250 251 self.assertIsNone(id_weight_pair.weight_tensor) 252 _assert_sparse_tensor_indices_shape( 253 self, expected, self.evaluate(id_weight_pair.id_tensor)) 254 255 256@test_util.run_all_in_graph_and_eager_modes 257class SequenceCategoricalColumnWithVocabularyFileTest( 258 test.TestCase, parameterized.TestCase): 259 260 def _write_vocab(self, vocab_strings, file_name): 261 vocab_file = os.path.join(self.get_temp_dir(), file_name) 262 with open(vocab_file, 'w') as f: 263 f.write('\n'.join(vocab_strings)) 264 return vocab_file 265 266 def setUp(self): 267 super(SequenceCategoricalColumnWithVocabularyFileTest, self).setUp() 268 269 vocab_strings = ['omar', 'stringer', 'marlo'] 270 self._wire_vocabulary_file_name = self._write_vocab(vocab_strings, 271 'wire_vocabulary.txt') 272 self._wire_vocabulary_size = 3 273 274 @parameterized.named_parameters( 275 {'testcase_name': '2D', 276 'inputs_args': { 277 'indices': ((0, 0), (1, 0), (1, 1)), 278 'values': ('marlo', 'skywalker', 'omar'), 279 'dense_shape': (2, 2)}, 280 'expected_args': { 281 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), 282 'values': np.array((2, -1, 0), dtype=np.int64), 283 'dense_shape': (2, 2, 1)}}, 284 {'testcase_name': '3D', 285 'inputs_args': { 286 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), 287 'values': ('omar', 'skywalker', 'marlo'), 288 'dense_shape': (2, 2, 2)}, 289 'expected_args': { 290 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), 291 'values': np.array((0, -1, 2), dtype=np.int64), 292 'dense_shape': (2, 2, 2)}} 293 ) 294 def test_get_sparse_tensors(self, inputs_args, expected_args): 295 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 296 expected = sparse_tensor.SparseTensorValue(**expected_args) 297 column = sfc.sequence_categorical_column_with_vocabulary_file( 298 key='aaa', 299 vocabulary_file=self._wire_vocabulary_file_name, 300 vocabulary_size=self._wire_vocabulary_size) 301 302 id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs}) 303 304 self.assertIsNone(id_weight_pair.weight_tensor) 305 self.evaluate(variables_lib.global_variables_initializer()) 306 self.evaluate(lookup_ops.tables_initializer()) 307 _assert_sparse_tensor_value( 308 self, expected, self.evaluate(id_weight_pair.id_tensor)) 309 310 def test_get_sparse_tensors_dynamic_zero_length(self): 311 """Tests _get_sparse_tensors with a dynamic sequence length.""" 312 with ops.Graph().as_default(): 313 inputs = sparse_tensor.SparseTensorValue( 314 indices=np.zeros((0, 2)), values=[], dense_shape=(2, 0)) 315 expected = sparse_tensor.SparseTensorValue( 316 indices=np.zeros((0, 3)), 317 values=np.array((), dtype=np.int64), 318 dense_shape=(2, 0, 1)) 319 column = sfc.sequence_categorical_column_with_vocabulary_file( 320 key='aaa', 321 vocabulary_file=self._wire_vocabulary_file_name, 322 vocabulary_size=self._wire_vocabulary_size) 323 input_placeholder_shape = list(inputs.dense_shape) 324 # Make second dimension (sequence length) dynamic. 325 input_placeholder_shape[1] = None 326 input_placeholder = array_ops.sparse_placeholder( 327 dtypes.string, shape=input_placeholder_shape) 328 id_weight_pair = _get_sparse_tensors(column, {'aaa': input_placeholder}) 329 330 self.assertIsNone(id_weight_pair.weight_tensor) 331 with _initialized_session() as sess: 332 result = id_weight_pair.id_tensor.eval( 333 session=sess, feed_dict={input_placeholder: inputs}) 334 _assert_sparse_tensor_value( 335 self, expected, result) 336 337 338@test_util.run_all_in_graph_and_eager_modes 339class SequenceCategoricalColumnWithVocabularyListTest( 340 test.TestCase, parameterized.TestCase): 341 342 @parameterized.named_parameters( 343 {'testcase_name': '2D', 344 'inputs_args': { 345 'indices': ((0, 0), (1, 0), (1, 1)), 346 'values': ('marlo', 'skywalker', 'omar'), 347 'dense_shape': (2, 2)}, 348 'expected_args': { 349 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), 350 'values': np.array((2, -1, 0), dtype=np.int64), 351 'dense_shape': (2, 2, 1)}}, 352 {'testcase_name': '3D', 353 'inputs_args': { 354 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), 355 'values': ('omar', 'skywalker', 'marlo'), 356 'dense_shape': (2, 2, 2)}, 357 'expected_args': { 358 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), 359 'values': np.array((0, -1, 2), dtype=np.int64), 360 'dense_shape': (2, 2, 2)}} 361 ) 362 def test_get_sparse_tensors(self, inputs_args, expected_args): 363 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 364 expected = sparse_tensor.SparseTensorValue(**expected_args) 365 column = sfc.sequence_categorical_column_with_vocabulary_list( 366 key='aaa', 367 vocabulary_list=('omar', 'stringer', 'marlo')) 368 369 id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs}) 370 371 self.assertIsNone(id_weight_pair.weight_tensor) 372 self.evaluate(variables_lib.global_variables_initializer()) 373 self.evaluate(lookup_ops.tables_initializer()) 374 _assert_sparse_tensor_value( 375 self, expected, self.evaluate(id_weight_pair.id_tensor)) 376 377 378@test_util.run_all_in_graph_and_eager_modes 379class SequenceEmbeddingColumnTest( 380 test.TestCase, parameterized.TestCase): 381 382 @parameterized.named_parameters( 383 {'testcase_name': '2D', 384 'inputs_args': { 385 # example 0, ids [2] 386 # example 1, ids [0, 1] 387 # example 2, ids [] 388 # example 3, ids [1] 389 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), 390 'values': (2, 0, 1, 1), 391 'dense_shape': (4, 2)}, 392 'expected': [ 393 # example 0, ids [2] 394 [[7., 11.], [0., 0.]], 395 # example 1, ids [0, 1] 396 [[1., 2.], [3., 5.]], 397 # example 2, ids [] 398 [[0., 0.], [0., 0.]], 399 # example 3, ids [1] 400 [[3., 5.], [0., 0.]]]}, 401 {'testcase_name': '3D', 402 'inputs_args': { 403 # example 0, ids [[2]] 404 # example 1, ids [[0, 1], [2]] 405 # example 2, ids [] 406 # example 3, ids [[1], [0, 2]] 407 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), 408 (3, 0, 0), (3, 1, 0), (3, 1, 1)), 409 'values': (2, 0, 1, 2, 1, 0, 2), 410 'dense_shape': (4, 2, 2)}, 411 'expected': [ 412 # example 0, ids [[2]] 413 [[7., 11.], [0., 0.]], 414 # example 1, ids [[0, 1], [2]] 415 [[2, 3.5], [7., 11.]], 416 # example 2, ids [] 417 [[0., 0.], [0., 0.]], 418 # example 3, ids [[1], [0, 2]] 419 [[3., 5.], [4., 6.5]]]} 420 ) 421 def test_get_sequence_dense_tensor(self, inputs_args, expected): 422 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 423 vocabulary_size = 3 424 embedding_dimension = 2 425 embedding_values = ( 426 (1., 2.), # id 0 427 (3., 5.), # id 1 428 (7., 11.) # id 2 429 ) 430 431 def _initializer(shape, dtype, partition_info=None): 432 self.assertAllEqual((vocabulary_size, embedding_dimension), shape) 433 self.assertEqual(dtypes.float32, dtype) 434 self.assertIsNone(partition_info) 435 return embedding_values 436 437 categorical_column = sfc.sequence_categorical_column_with_identity( 438 key='aaa', num_buckets=vocabulary_size) 439 embedding_column = fc.embedding_column( 440 categorical_column, dimension=embedding_dimension, 441 initializer=_initializer) 442 443 embedding_lookup, _, state_manager = _get_sequence_dense_tensor_state( 444 embedding_column, {'aaa': inputs}) 445 446 variables = state_manager._layer.weights 447 self.evaluate(variables_lib.global_variables_initializer()) 448 self.assertCountEqual( 449 ('embedding_weights:0',), tuple([v.name for v in variables])) 450 self.assertAllEqual(embedding_values, self.evaluate(variables[0])) 451 self.assertAllEqual(expected, self.evaluate(embedding_lookup)) 452 453 @parameterized.named_parameters( 454 {'testcase_name': '2D', 455 'inputs_args': { 456 # example 0, ids [2] 457 # example 1, ids [0, 1] 458 'indices': ((0, 0), (1, 0), (1, 1)), 459 'values': (2, 0, 1), 460 'dense_shape': (2, 2)}, 461 'expected_sequence_length': [1, 2]}, 462 {'testcase_name': '3D', 463 'inputs_args': { 464 # example 0, ids [[2]] 465 # example 1, ids [[0, 1], [2]] 466 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), 467 'values': (2, 0, 1, 2), 468 'dense_shape': (2, 2, 2)}, 469 'expected_sequence_length': [1, 2]} 470 ) 471 def test_sequence_length(self, inputs_args, expected_sequence_length): 472 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 473 vocabulary_size = 3 474 475 categorical_column = sfc.sequence_categorical_column_with_identity( 476 key='aaa', num_buckets=vocabulary_size) 477 embedding_column = fc.embedding_column( 478 categorical_column, dimension=2) 479 480 _, sequence_length, _ = _get_sequence_dense_tensor_state( 481 embedding_column, {'aaa': inputs}) 482 483 sequence_length = self.evaluate(sequence_length) 484 self.assertAllEqual(expected_sequence_length, sequence_length) 485 self.assertEqual(np.int64, sequence_length.dtype) 486 487 def test_sequence_length_with_empty_rows(self): 488 """Tests _sequence_length when some examples do not have ids.""" 489 vocabulary_size = 3 490 sparse_input = sparse_tensor.SparseTensorValue( 491 # example 0, ids [] 492 # example 1, ids [2] 493 # example 2, ids [0, 1] 494 # example 3, ids [] 495 # example 4, ids [1] 496 # example 5, ids [] 497 indices=((1, 0), (2, 0), (2, 1), (4, 0)), 498 values=(2, 0, 1, 1), 499 dense_shape=(6, 2)) 500 expected_sequence_length = [0, 1, 2, 0, 1, 0] 501 502 categorical_column = sfc.sequence_categorical_column_with_identity( 503 key='aaa', num_buckets=vocabulary_size) 504 embedding_column = fc.embedding_column( 505 categorical_column, dimension=2) 506 507 _, sequence_length, _ = _get_sequence_dense_tensor_state( 508 embedding_column, {'aaa': sparse_input}) 509 510 self.assertAllEqual( 511 expected_sequence_length, self.evaluate(sequence_length)) 512 513 514class SequenceSharedEmbeddingColumnTest(test.TestCase): 515 516 def test_get_sequence_dense_tensor(self): 517 vocabulary_size = 3 518 embedding_dimension = 2 519 embedding_values = ( 520 (1., 2.), # id 0 521 (3., 5.), # id 1 522 (7., 11.) # id 2 523 ) 524 525 def _initializer(shape, dtype, partition_info=None): 526 self.assertAllEqual((vocabulary_size, embedding_dimension), shape) 527 self.assertEqual(dtypes.float32, dtype) 528 self.assertIsNone(partition_info) 529 return embedding_values 530 531 with ops.Graph().as_default(): 532 sparse_input_a = sparse_tensor.SparseTensorValue( 533 # example 0, ids [2] 534 # example 1, ids [0, 1] 535 # example 2, ids [] 536 # example 3, ids [1] 537 indices=((0, 0), (1, 0), (1, 1), (3, 0)), 538 values=(2, 0, 1, 1), 539 dense_shape=(4, 2)) 540 sparse_input_b = sparse_tensor.SparseTensorValue( 541 # example 0, ids [1] 542 # example 1, ids [0, 2] 543 # example 2, ids [0] 544 # example 3, ids [] 545 indices=((0, 0), (1, 0), (1, 1), (2, 0)), 546 values=(1, 0, 2, 0), 547 dense_shape=(4, 2)) 548 549 expected_lookups_a = [ 550 # example 0, ids [2] 551 [[7., 11.], [0., 0.]], 552 # example 1, ids [0, 1] 553 [[1., 2.], [3., 5.]], 554 # example 2, ids [] 555 [[0., 0.], [0., 0.]], 556 # example 3, ids [1] 557 [[3., 5.], [0., 0.]], 558 ] 559 560 expected_lookups_b = [ 561 # example 0, ids [1] 562 [[3., 5.], [0., 0.]], 563 # example 1, ids [0, 2] 564 [[1., 2.], [7., 11.]], 565 # example 2, ids [0] 566 [[1., 2.], [0., 0.]], 567 # example 3, ids [] 568 [[0., 0.], [0., 0.]], 569 ] 570 571 categorical_column_a = sfc.sequence_categorical_column_with_identity( 572 key='aaa', num_buckets=vocabulary_size) 573 categorical_column_b = sfc.sequence_categorical_column_with_identity( 574 key='bbb', num_buckets=vocabulary_size) 575 shared_embedding_columns = fc.shared_embedding_columns_v2( 576 [categorical_column_a, categorical_column_b], 577 dimension=embedding_dimension, 578 initializer=_initializer) 579 580 embedding_lookup_a = _get_sequence_dense_tensor( 581 shared_embedding_columns[0], {'aaa': sparse_input_a})[0] 582 embedding_lookup_b = _get_sequence_dense_tensor( 583 shared_embedding_columns[1], {'bbb': sparse_input_b})[0] 584 585 self.evaluate(variables_lib.global_variables_initializer()) 586 global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) 587 self.assertItemsEqual(('aaa_bbb_shared_embedding:0',), 588 tuple([v.name for v in global_vars])) 589 self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) 590 self.assertAllEqual( 591 expected_lookups_a, self.evaluate(embedding_lookup_a)) 592 self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) 593 594 def test_sequence_length(self): 595 with ops.Graph().as_default(): 596 vocabulary_size = 3 597 598 sparse_input_a = sparse_tensor.SparseTensorValue( 599 # example 0, ids [2] 600 # example 1, ids [0, 1] 601 indices=((0, 0), (1, 0), (1, 1)), 602 values=(2, 0, 1), 603 dense_shape=(2, 2)) 604 expected_sequence_length_a = [1, 2] 605 categorical_column_a = sfc.sequence_categorical_column_with_identity( 606 key='aaa', num_buckets=vocabulary_size) 607 608 sparse_input_b = sparse_tensor.SparseTensorValue( 609 # example 0, ids [0, 2] 610 # example 1, ids [1] 611 indices=((0, 0), (0, 1), (1, 0)), 612 values=(0, 2, 1), 613 dense_shape=(2, 2)) 614 expected_sequence_length_b = [2, 1] 615 categorical_column_b = sfc.sequence_categorical_column_with_identity( 616 key='bbb', num_buckets=vocabulary_size) 617 shared_embedding_columns = fc.shared_embedding_columns_v2( 618 [categorical_column_a, categorical_column_b], dimension=2) 619 620 sequence_length_a = _get_sequence_dense_tensor( 621 shared_embedding_columns[0], {'aaa': sparse_input_a})[1] 622 sequence_length_b = _get_sequence_dense_tensor( 623 shared_embedding_columns[1], {'bbb': sparse_input_b})[1] 624 625 with _initialized_session() as sess: 626 sequence_length_a = sess.run(sequence_length_a) 627 self.assertAllEqual(expected_sequence_length_a, sequence_length_a) 628 self.assertEqual(np.int64, sequence_length_a.dtype) 629 sequence_length_b = sess.run(sequence_length_b) 630 self.assertAllEqual(expected_sequence_length_b, sequence_length_b) 631 self.assertEqual(np.int64, sequence_length_b.dtype) 632 633 def test_sequence_length_with_empty_rows(self): 634 """Tests _sequence_length when some examples do not have ids.""" 635 with ops.Graph().as_default(): 636 vocabulary_size = 3 637 sparse_input_a = sparse_tensor.SparseTensorValue( 638 # example 0, ids [] 639 # example 1, ids [2] 640 # example 2, ids [0, 1] 641 # example 3, ids [] 642 # example 4, ids [1] 643 # example 5, ids [] 644 indices=((1, 0), (2, 0), (2, 1), (4, 0)), 645 values=(2, 0, 1, 1), 646 dense_shape=(6, 2)) 647 expected_sequence_length_a = [0, 1, 2, 0, 1, 0] 648 categorical_column_a = sfc.sequence_categorical_column_with_identity( 649 key='aaa', num_buckets=vocabulary_size) 650 651 sparse_input_b = sparse_tensor.SparseTensorValue( 652 # example 0, ids [2] 653 # example 1, ids [] 654 # example 2, ids [] 655 # example 3, ids [] 656 # example 4, ids [1] 657 # example 5, ids [0, 1] 658 indices=((0, 0), (4, 0), (5, 0), (5, 1)), 659 values=(2, 1, 0, 1), 660 dense_shape=(6, 2)) 661 expected_sequence_length_b = [1, 0, 0, 0, 1, 2] 662 categorical_column_b = sfc.sequence_categorical_column_with_identity( 663 key='bbb', num_buckets=vocabulary_size) 664 665 shared_embedding_columns = fc.shared_embedding_columns_v2( 666 [categorical_column_a, categorical_column_b], dimension=2) 667 668 sequence_length_a = _get_sequence_dense_tensor( 669 shared_embedding_columns[0], {'aaa': sparse_input_a})[1] 670 sequence_length_b = _get_sequence_dense_tensor( 671 shared_embedding_columns[1], {'bbb': sparse_input_b})[1] 672 673 with _initialized_session() as sess: 674 self.assertAllEqual( 675 expected_sequence_length_a, sequence_length_a.eval(session=sess)) 676 self.assertAllEqual( 677 expected_sequence_length_b, sequence_length_b.eval(session=sess)) 678 679 680@test_util.run_all_in_graph_and_eager_modes 681class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase): 682 683 @parameterized.named_parameters( 684 {'testcase_name': '2D', 685 'inputs_args': { 686 # example 0, ids [2] 687 # example 1, ids [0, 1] 688 # example 2, ids [] 689 # example 3, ids [1] 690 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), 691 'values': (2, 0, 1, 1), 692 'dense_shape': (4, 2)}, 693 'expected': [ 694 # example 0, ids [2] 695 [[0., 0., 1.], [0., 0., 0.]], 696 # example 1, ids [0, 1] 697 [[1., 0., 0.], [0., 1., 0.]], 698 # example 2, ids [] 699 [[0., 0., 0.], [0., 0., 0.]], 700 # example 3, ids [1] 701 [[0., 1., 0.], [0., 0., 0.]]]}, 702 {'testcase_name': '3D', 703 'inputs_args': { 704 # example 0, ids [[2]] 705 # example 1, ids [[0, 1], [2]] 706 # example 2, ids [] 707 # example 3, ids [[1], [2, 2]] 708 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), 709 (3, 0, 0), (3, 1, 0), (3, 1, 1)), 710 'values': (2, 0, 1, 2, 1, 2, 2), 711 'dense_shape': (4, 2, 2)}, 712 'expected': [ 713 # example 0, ids [[2]] 714 [[0., 0., 1.], [0., 0., 0.]], 715 # example 1, ids [[0, 1], [2]] 716 [[1., 1., 0.], [0., 0., 1.]], 717 # example 2, ids [] 718 [[0., 0., 0.], [0., 0., 0.]], 719 # example 3, ids [[1], [2, 2]] 720 [[0., 1., 0.], [0., 0., 2.]]]} 721 ) 722 def test_get_sequence_dense_tensor(self, inputs_args, expected): 723 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 724 vocabulary_size = 3 725 726 categorical_column = sfc.sequence_categorical_column_with_identity( 727 key='aaa', num_buckets=vocabulary_size) 728 indicator_column = fc.indicator_column(categorical_column) 729 730 indicator_tensor, _ = _get_sequence_dense_tensor( 731 indicator_column, {'aaa': inputs}) 732 733 self.assertAllEqual(expected, self.evaluate(indicator_tensor)) 734 735 @parameterized.named_parameters( 736 {'testcase_name': '2D', 737 'inputs_args': { 738 # example 0, ids [2] 739 # example 1, ids [0, 1] 740 'indices': ((0, 0), (1, 0), (1, 1)), 741 'values': (2, 0, 1), 742 'dense_shape': (2, 2)}, 743 'expected_sequence_length': [1, 2]}, 744 {'testcase_name': '3D', 745 'inputs_args': { 746 # example 0, ids [[2]] 747 # example 1, ids [[0, 1], [2]] 748 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), 749 'values': (2, 0, 1, 2), 750 'dense_shape': (2, 2, 2)}, 751 'expected_sequence_length': [1, 2]} 752 ) 753 def test_sequence_length(self, inputs_args, expected_sequence_length): 754 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 755 vocabulary_size = 3 756 757 categorical_column = sfc.sequence_categorical_column_with_identity( 758 key='aaa', num_buckets=vocabulary_size) 759 indicator_column = fc.indicator_column(categorical_column) 760 761 _, sequence_length = _get_sequence_dense_tensor( 762 indicator_column, {'aaa': inputs}) 763 764 sequence_length = self.evaluate(sequence_length) 765 self.assertAllEqual(expected_sequence_length, sequence_length) 766 self.assertEqual(np.int64, sequence_length.dtype) 767 768 def test_sequence_length_with_empty_rows(self): 769 """Tests _sequence_length when some examples do not have ids.""" 770 vocabulary_size = 3 771 sparse_input = sparse_tensor.SparseTensorValue( 772 # example 0, ids [] 773 # example 1, ids [2] 774 # example 2, ids [0, 1] 775 # example 3, ids [] 776 # example 4, ids [1] 777 # example 5, ids [] 778 indices=((1, 0), (2, 0), (2, 1), (4, 0)), 779 values=(2, 0, 1, 1), 780 dense_shape=(6, 2)) 781 expected_sequence_length = [0, 1, 2, 0, 1, 0] 782 783 categorical_column = sfc.sequence_categorical_column_with_identity( 784 key='aaa', num_buckets=vocabulary_size) 785 indicator_column = fc.indicator_column(categorical_column) 786 787 _, sequence_length = _get_sequence_dense_tensor( 788 indicator_column, {'aaa': sparse_input}) 789 790 self.assertAllEqual( 791 expected_sequence_length, self.evaluate(sequence_length)) 792 793 794@test_util.run_all_in_graph_and_eager_modes 795class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase): 796 797 def test_defaults(self): 798 a = sfc.sequence_numeric_column('aaa') 799 self.assertEqual('aaa', a.key) 800 self.assertEqual('aaa', a.name) 801 self.assertEqual((1,), a.shape) 802 self.assertEqual(0., a.default_value) 803 self.assertEqual(dtypes.float32, a.dtype) 804 self.assertIsNone(a.normalizer_fn) 805 806 def test_shape_saved_as_tuple(self): 807 a = sfc.sequence_numeric_column('aaa', shape=[1, 2]) 808 self.assertEqual((1, 2), a.shape) 809 810 def test_shape_must_be_positive_integer(self): 811 with self.assertRaisesRegex(TypeError, 'shape dimensions must be integer'): 812 sfc.sequence_numeric_column('aaa', shape=[1.0]) 813 814 with self.assertRaisesRegex(ValueError, 815 'shape dimensions must be greater than 0'): 816 sfc.sequence_numeric_column('aaa', shape=[0]) 817 818 def test_dtype_is_convertible_to_float(self): 819 with self.assertRaisesRegex(ValueError, 820 'dtype must be convertible to float'): 821 sfc.sequence_numeric_column('aaa', dtype=dtypes.string) 822 823 def test_normalizer_fn_must_be_callable(self): 824 with self.assertRaisesRegex(TypeError, 'must be a callable'): 825 sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable') 826 827 @parameterized.named_parameters( 828 {'testcase_name': '2D', 829 'inputs_args': { 830 # example 0, values [0., 1] 831 # example 1, [10.] 832 'indices': ((0, 0), (0, 1), (1, 0)), 833 'values': (0., 1., 10.), 834 'dense_shape': (2, 2)}, 835 'expected': [ 836 [[0.], [1.]], 837 [[10.], [0.]]]}, 838 {'testcase_name': '3D', 839 'inputs_args': { 840 # feature 0, ids [[20, 3], [5]] 841 # feature 1, ids [[3], [8]] 842 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), 843 'values': (20, 3, 5., 3., 8.), 844 'dense_shape': (2, 2, 2)}, 845 'expected': [ 846 [[20.], [3.], [5.], [0.]], 847 [[3.], [0.], [8.], [0.]]]}, 848 ) 849 def test_get_sequence_dense_tensor(self, inputs_args, expected): 850 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 851 numeric_column = sfc.sequence_numeric_column('aaa') 852 853 dense_tensor, _ = _get_sequence_dense_tensor( 854 numeric_column, {'aaa': inputs}) 855 self.assertAllEqual(expected, self.evaluate(dense_tensor)) 856 857 def test_get_sequence_dense_tensor_with_normalizer_fn(self): 858 859 def _increment_two(input_sparse_tensor): 860 return sparse_ops.sparse_add( 861 input_sparse_tensor, 862 sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2)) 863 ) 864 865 sparse_input = sparse_tensor.SparseTensorValue( 866 # example 0, values [[0.], [1]] 867 # example 1, [[10.]] 868 indices=((0, 0), (0, 1), (1, 0)), 869 values=(0., 1., 10.), 870 dense_shape=(2, 2)) 871 872 # Before _increment_two: 873 # [[0.], [1.]], 874 # [[10.], [0.]], 875 # After _increment_two: 876 # [[2.], [1.]], 877 # [[10.], [2.]], 878 expected_dense_tensor = [ 879 [[2.], [1.]], 880 [[10.], [2.]], 881 ] 882 numeric_column = sfc.sequence_numeric_column( 883 'aaa', normalizer_fn=_increment_two) 884 885 dense_tensor, _ = _get_sequence_dense_tensor( 886 numeric_column, {'aaa': sparse_input}) 887 888 self.assertAllEqual( 889 expected_dense_tensor, self.evaluate(dense_tensor)) 890 891 @parameterized.named_parameters( 892 {'testcase_name': '2D', 893 'sparse_input_args': { 894 # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] 895 # example 1, [[[10., 11.], [12., 13.]]] 896 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), 897 (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), 898 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), 899 'dense_shape': (2, 8)}, 900 'expected_dense_tensor': [ 901 [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], 902 [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]}, 903 {'testcase_name': '3D', 904 'sparse_input_args': { 905 'indices': ((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6), 906 (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6), 907 (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)), 908 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), 909 'dense_shape': (2, 2, 8)}, 910 'expected_dense_tensor': [ 911 [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]], 912 [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]], 913 [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]], 914 [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]}, 915 ) 916 def test_get_dense_tensor_multi_dim( 917 self, sparse_input_args, expected_dense_tensor): 918 """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" 919 sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) 920 numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) 921 922 dense_tensor, _ = _get_sequence_dense_tensor( 923 numeric_column, {'aaa': sparse_input}) 924 925 self.assertAllEqual( 926 expected_dense_tensor, self.evaluate(dense_tensor)) 927 928 @parameterized.named_parameters( 929 {'testcase_name': '2D', 930 'inputs_args': { 931 # example 0, ids [2] 932 # example 1, ids [0, 1] 933 'indices': ((0, 0), (1, 0), (1, 1)), 934 'values': (2., 0., 1.), 935 'dense_shape': (2, 2)}, 936 'expected_sequence_length': [1, 2], 937 'shape': (1,)}, 938 {'testcase_name': '3D', 939 'inputs_args': { 940 # example 0, ids [[2]] 941 # example 1, ids [[0, 1], [2]] 942 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), 943 'values': (2., 0., 1., 2.), 944 'dense_shape': (2, 2, 2)}, 945 'expected_sequence_length': [1, 2], 946 'shape': (1,)}, 947 {'testcase_name': '2D_with_shape', 948 'inputs_args': { 949 # example 0, ids [2] 950 # example 1, ids [0, 1] 951 'indices': ((0, 0), (1, 0), (1, 1)), 952 'values': (2., 0., 1.), 953 'dense_shape': (2, 2)}, 954 'expected_sequence_length': [1, 1], 955 'shape': (2,)}, 956 {'testcase_name': '3D_with_shape', 957 'inputs_args': { 958 # example 0, ids [[2]] 959 # example 1, ids [[0, 1], [2]] 960 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), 961 'values': (2., 0., 1., 2.), 962 'dense_shape': (2, 2, 2)}, 963 'expected_sequence_length': [1, 2], 964 'shape': (2,)}, 965 ) 966 def test_sequence_length(self, inputs_args, expected_sequence_length, shape): 967 inputs = sparse_tensor.SparseTensorValue(**inputs_args) 968 numeric_column = sfc.sequence_numeric_column('aaa', shape=shape) 969 970 _, sequence_length = _get_sequence_dense_tensor( 971 numeric_column, {'aaa': inputs}) 972 973 sequence_length = self.evaluate(sequence_length) 974 self.assertAllEqual(expected_sequence_length, sequence_length) 975 self.assertEqual(np.int64, sequence_length.dtype) 976 977 def test_sequence_length_with_empty_rows(self): 978 """Tests _sequence_length when some examples do not have ids.""" 979 sparse_input = sparse_tensor.SparseTensorValue( 980 # example 0, values [] 981 # example 1, values [[0.], [1.]] 982 # example 2, [[2.]] 983 # example 3, values [] 984 # example 4, [[3.]] 985 # example 5, values [] 986 indices=((1, 0), (1, 1), (2, 0), (4, 0)), 987 values=(0., 1., 2., 3.), 988 dense_shape=(6, 2)) 989 expected_sequence_length = [0, 2, 1, 0, 1, 0] 990 numeric_column = sfc.sequence_numeric_column('aaa') 991 992 _, sequence_length = _get_sequence_dense_tensor( 993 numeric_column, {'aaa': sparse_input}) 994 995 self.assertAllEqual( 996 expected_sequence_length, self.evaluate(sequence_length)) 997 998 def test_serialization(self): 999 """Tests that column can be serialized.""" 1000 def _custom_fn(input_tensor): 1001 return input_tensor + 42 1002 1003 column = sfc.sequence_numeric_column( 1004 key='my-key', shape=(2,), default_value=3, dtype=dtypes.int32, 1005 normalizer_fn=_custom_fn) 1006 configs = serialization.serialize_feature_column(column) 1007 column = serialization.deserialize_feature_column( 1008 configs, custom_objects={_custom_fn.__name__: _custom_fn}) 1009 self.assertEqual(column.key, 'my-key') 1010 self.assertEqual(column.shape, (2,)) 1011 self.assertEqual(column.default_value, 3) 1012 self.assertEqual(column.normalizer_fn(3), 45) 1013 with self.assertRaisesRegex(ValueError, 1014 'Instance: 0 is not a FeatureColumn'): 1015 serialization.serialize_feature_column(int()) 1016 1017 def test_parents(self): 1018 """Tests parents attribute of column.""" 1019 column = sfc.sequence_numeric_column(key='my-key') 1020 self.assertEqual(column.parents, ['my-key']) 1021 1022 1023if __name__ == '__main__': 1024 test.main() 1025