xref: /aosp_15_r20/external/tensorflow/tensorflow/python/feature_column/sequence_feature_column_test.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for sequential_feature_column."""
16
17import os
18
19from absl.testing import parameterized
20# Should remove this in future since it use a keras component for unit test.
21from keras.feature_column import dense_features
22import numpy as np
23
24from tensorflow.python.client import session
25from tensorflow.python.feature_column import feature_column_v2 as fc
26from tensorflow.python.feature_column import sequence_feature_column as sfc
27from tensorflow.python.feature_column import serialization
28from tensorflow.python.framework import dtypes
29from tensorflow.python.framework import ops
30from tensorflow.python.framework import sparse_tensor
31from tensorflow.python.framework import test_util
32from tensorflow.python.ops import array_ops
33from tensorflow.python.ops import lookup_ops
34from tensorflow.python.ops import math_ops
35from tensorflow.python.ops import sparse_ops
36from tensorflow.python.ops import variables as variables_lib
37from tensorflow.python.platform import test
38
39
40def _initialized_session(config=None):
41  sess = session.Session(config=config)
42  sess.run(variables_lib.global_variables_initializer())
43  sess.run(lookup_ops.tables_initializer())
44  return sess
45
46
47@test_util.run_all_in_graph_and_eager_modes
48class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
49  """Tests the utility fn concatenate_context_input."""
50
51  def test_concatenate_context_input(self):
52    seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2))
53    context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5))
54    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
55    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
56    input_layer = sfc.concatenate_context_input(context_input, seq_input)
57
58    expected = np.array([
59        [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]],
60        [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]]
61    ], dtype=np.float32)
62    output = self.evaluate(input_layer)
63    self.assertAllEqual(expected, output)
64
65  @parameterized.named_parameters(
66      {'testcase_name': 'rank_lt_3',
67       'seq_input_arg': np.arange(100).reshape(10, 10)},
68      {'testcase_name': 'rank_gt_3',
69       'seq_input_arg': np.arange(100).reshape(5, 5, 2, 2)}
70      )
71  def test_sequence_input_throws_error(self, seq_input_arg):
72    seq_input = ops.convert_to_tensor(seq_input_arg)
73    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
74    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
75    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
76    with self.assertRaisesRegex(ValueError, 'sequence_input must have rank 3'):
77      sfc.concatenate_context_input(context_input, seq_input)
78
79  @parameterized.named_parameters(
80      {'testcase_name': 'rank_lt_2',
81       'context_input_arg': np.arange(100)},
82      {'testcase_name': 'rank_gt_2',
83       'context_input_arg': np.arange(100).reshape(5, 5, 4)}
84      )
85  def test_context_input_throws_error(self, context_input_arg):
86    context_input = ops.convert_to_tensor(context_input_arg)
87    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
88    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
89    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
90    with self.assertRaisesRegex(ValueError, 'context_input must have rank 2'):
91      sfc.concatenate_context_input(context_input, seq_input)
92
93  def test_integer_seq_input_throws_error(self):
94    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
95    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
96    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
97    with self.assertRaisesRegex(TypeError,
98                                'sequence_input must have dtype float32'):
99      sfc.concatenate_context_input(context_input, seq_input)
100
101  def test_integer_context_input_throws_error(self):
102    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
103    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
104    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
105    with self.assertRaisesRegex(TypeError,
106                                'context_input must have dtype float32'):
107      sfc.concatenate_context_input(context_input, seq_input)
108
109
110def _assert_sparse_tensor_value(test_case, expected, actual):
111  _assert_sparse_tensor_indices_shape(test_case, expected, actual)
112
113  test_case.assertEqual(
114      np.array(expected.values).dtype, np.array(actual.values).dtype)
115  test_case.assertAllEqual(expected.values, actual.values)
116
117
118def _assert_sparse_tensor_indices_shape(test_case, expected, actual):
119  test_case.assertEqual(np.int64, np.array(actual.indices).dtype)
120  test_case.assertAllEqual(expected.indices, actual.indices)
121
122  test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype)
123  test_case.assertAllEqual(expected.dense_shape, actual.dense_shape)
124
125
126def _get_sequence_dense_tensor(column, features):
127  return column.get_sequence_dense_tensor(
128      fc.FeatureTransformationCache(features), None)
129
130
131def _get_sequence_dense_tensor_state(column, features):
132  state_manager = fc._StateManagerImpl(
133      dense_features.DenseFeatures(column), trainable=True)
134  column.create_state(state_manager)
135  dense_tensor, lengths = column.get_sequence_dense_tensor(
136      fc.FeatureTransformationCache(features), state_manager)
137  return dense_tensor, lengths, state_manager
138
139
140def _get_sparse_tensors(column, features):
141  return column.get_sparse_tensors(
142      fc.FeatureTransformationCache(features), None)
143
144
145@test_util.run_all_in_graph_and_eager_modes
146class SequenceCategoricalColumnWithIdentityTest(
147    test.TestCase, parameterized.TestCase):
148
149  @parameterized.named_parameters(
150      {'testcase_name': '2D',
151       'inputs_args': {
152           'indices': ((0, 0), (1, 0), (1, 1)),
153           'values': (1, 2, 0),
154           'dense_shape': (2, 2)},
155       'expected_args': {
156           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
157           'values': np.array((1, 2, 0), dtype=np.int64),
158           'dense_shape': (2, 2, 1)}},
159      {'testcase_name': '3D',
160       'inputs_args': {
161           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
162           'values': (6, 7, 8),
163           'dense_shape': (2, 2, 2)},
164       'expected_args': {
165           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
166           'values': np.array((6, 7, 8), dtype=np.int64),
167           'dense_shape': (2, 2, 2)}}
168      )
169  def test_get_sparse_tensors(self, inputs_args, expected_args):
170    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
171    expected = sparse_tensor.SparseTensorValue(**expected_args)
172    column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9)
173
174    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
175
176    self.assertIsNone(id_weight_pair.weight_tensor)
177    _assert_sparse_tensor_value(
178        self, expected, self.evaluate(id_weight_pair.id_tensor))
179
180  def test_serialization(self):
181    """Tests that column can be serialized."""
182    parent = sfc.sequence_categorical_column_with_identity(
183        'animal', num_buckets=4)
184    animal = fc.indicator_column(parent)
185
186    config = animal.get_config()
187    self.assertEqual(
188        {
189            'categorical_column': {
190                'class_name': 'SequenceCategoricalColumn',
191                'config': {
192                    'categorical_column': {
193                        'class_name': 'IdentityCategoricalColumn',
194                        'config': {
195                            'default_value': None,
196                            'key': 'animal',
197                            'number_buckets': 4
198                        }
199                    }
200                }
201            }
202        }, config)
203
204    new_animal = fc.IndicatorColumn.from_config(config)
205    self.assertEqual(animal, new_animal)
206    self.assertIsNot(parent, new_animal.categorical_column)
207
208    new_animal = fc.IndicatorColumn.from_config(
209        config,
210        columns_by_name={
211            serialization._column_name_with_class_name(parent): parent
212        })
213    self.assertEqual(animal, new_animal)
214    self.assertIs(parent, new_animal.categorical_column)
215
216
217@test_util.run_all_in_graph_and_eager_modes
218class SequenceCategoricalColumnWithHashBucketTest(
219    test.TestCase, parameterized.TestCase):
220
221  @parameterized.named_parameters(
222      {'testcase_name': '2D',
223       'inputs_args': {
224           'indices': ((0, 0), (1, 0), (1, 1)),
225           'values': ('omar', 'stringer', 'marlo'),
226           'dense_shape': (2, 2)},
227       'expected_args': {
228           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
229           # Ignored to avoid hash dependence in test.
230           'values': np.array((0, 0, 0), dtype=np.int64),
231           'dense_shape': (2, 2, 1)}},
232      {'testcase_name': '3D',
233       'inputs_args': {
234           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
235           'values': ('omar', 'stringer', 'marlo'),
236           'dense_shape': (2, 2, 2)},
237       'expected_args': {
238           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
239           # Ignored to avoid hash dependence in test.
240           'values': np.array((0, 0, 0), dtype=np.int64),
241           'dense_shape': (2, 2, 2)}}
242      )
243  def test_get_sparse_tensors(self, inputs_args, expected_args):
244    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
245    expected = sparse_tensor.SparseTensorValue(**expected_args)
246    column = sfc.sequence_categorical_column_with_hash_bucket(
247        'aaa', hash_bucket_size=10)
248
249    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
250
251    self.assertIsNone(id_weight_pair.weight_tensor)
252    _assert_sparse_tensor_indices_shape(
253        self, expected, self.evaluate(id_weight_pair.id_tensor))
254
255
256@test_util.run_all_in_graph_and_eager_modes
257class SequenceCategoricalColumnWithVocabularyFileTest(
258    test.TestCase, parameterized.TestCase):
259
260  def _write_vocab(self, vocab_strings, file_name):
261    vocab_file = os.path.join(self.get_temp_dir(), file_name)
262    with open(vocab_file, 'w') as f:
263      f.write('\n'.join(vocab_strings))
264    return vocab_file
265
266  def setUp(self):
267    super(SequenceCategoricalColumnWithVocabularyFileTest, self).setUp()
268
269    vocab_strings = ['omar', 'stringer', 'marlo']
270    self._wire_vocabulary_file_name = self._write_vocab(vocab_strings,
271                                                        'wire_vocabulary.txt')
272    self._wire_vocabulary_size = 3
273
274  @parameterized.named_parameters(
275      {'testcase_name': '2D',
276       'inputs_args': {
277           'indices': ((0, 0), (1, 0), (1, 1)),
278           'values': ('marlo', 'skywalker', 'omar'),
279           'dense_shape': (2, 2)},
280       'expected_args': {
281           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
282           'values': np.array((2, -1, 0), dtype=np.int64),
283           'dense_shape': (2, 2, 1)}},
284      {'testcase_name': '3D',
285       'inputs_args': {
286           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
287           'values': ('omar', 'skywalker', 'marlo'),
288           'dense_shape': (2, 2, 2)},
289       'expected_args': {
290           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
291           'values': np.array((0, -1, 2), dtype=np.int64),
292           'dense_shape': (2, 2, 2)}}
293      )
294  def test_get_sparse_tensors(self, inputs_args, expected_args):
295    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
296    expected = sparse_tensor.SparseTensorValue(**expected_args)
297    column = sfc.sequence_categorical_column_with_vocabulary_file(
298        key='aaa',
299        vocabulary_file=self._wire_vocabulary_file_name,
300        vocabulary_size=self._wire_vocabulary_size)
301
302    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
303
304    self.assertIsNone(id_weight_pair.weight_tensor)
305    self.evaluate(variables_lib.global_variables_initializer())
306    self.evaluate(lookup_ops.tables_initializer())
307    _assert_sparse_tensor_value(
308        self, expected, self.evaluate(id_weight_pair.id_tensor))
309
310  def test_get_sparse_tensors_dynamic_zero_length(self):
311    """Tests _get_sparse_tensors with a dynamic sequence length."""
312    with ops.Graph().as_default():
313      inputs = sparse_tensor.SparseTensorValue(
314          indices=np.zeros((0, 2)), values=[], dense_shape=(2, 0))
315      expected = sparse_tensor.SparseTensorValue(
316          indices=np.zeros((0, 3)),
317          values=np.array((), dtype=np.int64),
318          dense_shape=(2, 0, 1))
319      column = sfc.sequence_categorical_column_with_vocabulary_file(
320          key='aaa',
321          vocabulary_file=self._wire_vocabulary_file_name,
322          vocabulary_size=self._wire_vocabulary_size)
323      input_placeholder_shape = list(inputs.dense_shape)
324      # Make second dimension (sequence length) dynamic.
325      input_placeholder_shape[1] = None
326      input_placeholder = array_ops.sparse_placeholder(
327          dtypes.string, shape=input_placeholder_shape)
328      id_weight_pair = _get_sparse_tensors(column, {'aaa': input_placeholder})
329
330      self.assertIsNone(id_weight_pair.weight_tensor)
331      with _initialized_session() as sess:
332        result = id_weight_pair.id_tensor.eval(
333            session=sess, feed_dict={input_placeholder: inputs})
334        _assert_sparse_tensor_value(
335            self, expected, result)
336
337
338@test_util.run_all_in_graph_and_eager_modes
339class SequenceCategoricalColumnWithVocabularyListTest(
340    test.TestCase, parameterized.TestCase):
341
342  @parameterized.named_parameters(
343      {'testcase_name': '2D',
344       'inputs_args': {
345           'indices': ((0, 0), (1, 0), (1, 1)),
346           'values': ('marlo', 'skywalker', 'omar'),
347           'dense_shape': (2, 2)},
348       'expected_args': {
349           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
350           'values': np.array((2, -1, 0), dtype=np.int64),
351           'dense_shape': (2, 2, 1)}},
352      {'testcase_name': '3D',
353       'inputs_args': {
354           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
355           'values': ('omar', 'skywalker', 'marlo'),
356           'dense_shape': (2, 2, 2)},
357       'expected_args': {
358           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
359           'values': np.array((0, -1, 2), dtype=np.int64),
360           'dense_shape': (2, 2, 2)}}
361      )
362  def test_get_sparse_tensors(self, inputs_args, expected_args):
363    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
364    expected = sparse_tensor.SparseTensorValue(**expected_args)
365    column = sfc.sequence_categorical_column_with_vocabulary_list(
366        key='aaa',
367        vocabulary_list=('omar', 'stringer', 'marlo'))
368
369    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
370
371    self.assertIsNone(id_weight_pair.weight_tensor)
372    self.evaluate(variables_lib.global_variables_initializer())
373    self.evaluate(lookup_ops.tables_initializer())
374    _assert_sparse_tensor_value(
375        self, expected, self.evaluate(id_weight_pair.id_tensor))
376
377
378@test_util.run_all_in_graph_and_eager_modes
379class SequenceEmbeddingColumnTest(
380    test.TestCase, parameterized.TestCase):
381
382  @parameterized.named_parameters(
383      {'testcase_name': '2D',
384       'inputs_args': {
385           # example 0, ids [2]
386           # example 1, ids [0, 1]
387           # example 2, ids []
388           # example 3, ids [1]
389           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
390           'values': (2, 0, 1, 1),
391           'dense_shape': (4, 2)},
392       'expected': [
393           # example 0, ids [2]
394           [[7., 11.], [0., 0.]],
395           # example 1, ids [0, 1]
396           [[1., 2.], [3., 5.]],
397           # example 2, ids []
398           [[0., 0.], [0., 0.]],
399           # example 3, ids [1]
400           [[3., 5.], [0., 0.]]]},
401      {'testcase_name': '3D',
402       'inputs_args': {
403           # example 0, ids [[2]]
404           # example 1, ids [[0, 1], [2]]
405           # example 2, ids []
406           # example 3, ids [[1], [0, 2]]
407           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
408                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
409           'values': (2, 0, 1, 2, 1, 0, 2),
410           'dense_shape': (4, 2, 2)},
411       'expected': [
412           # example 0, ids [[2]]
413           [[7., 11.], [0., 0.]],
414           # example 1, ids [[0, 1], [2]]
415           [[2, 3.5], [7., 11.]],
416           # example 2, ids []
417           [[0., 0.], [0., 0.]],
418           # example 3, ids [[1], [0, 2]]
419           [[3., 5.], [4., 6.5]]]}
420      )
421  def test_get_sequence_dense_tensor(self, inputs_args, expected):
422    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
423    vocabulary_size = 3
424    embedding_dimension = 2
425    embedding_values = (
426        (1., 2.),  # id 0
427        (3., 5.),  # id 1
428        (7., 11.)  # id 2
429    )
430
431    def _initializer(shape, dtype, partition_info=None):
432      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
433      self.assertEqual(dtypes.float32, dtype)
434      self.assertIsNone(partition_info)
435      return embedding_values
436
437    categorical_column = sfc.sequence_categorical_column_with_identity(
438        key='aaa', num_buckets=vocabulary_size)
439    embedding_column = fc.embedding_column(
440        categorical_column, dimension=embedding_dimension,
441        initializer=_initializer)
442
443    embedding_lookup, _, state_manager = _get_sequence_dense_tensor_state(
444        embedding_column, {'aaa': inputs})
445
446    variables = state_manager._layer.weights
447    self.evaluate(variables_lib.global_variables_initializer())
448    self.assertCountEqual(
449        ('embedding_weights:0',), tuple([v.name for v in variables]))
450    self.assertAllEqual(embedding_values, self.evaluate(variables[0]))
451    self.assertAllEqual(expected, self.evaluate(embedding_lookup))
452
453  @parameterized.named_parameters(
454      {'testcase_name': '2D',
455       'inputs_args': {
456           # example 0, ids [2]
457           # example 1, ids [0, 1]
458           'indices': ((0, 0), (1, 0), (1, 1)),
459           'values': (2, 0, 1),
460           'dense_shape': (2, 2)},
461       'expected_sequence_length': [1, 2]},
462      {'testcase_name': '3D',
463       'inputs_args': {
464           # example 0, ids [[2]]
465           # example 1, ids [[0, 1], [2]]
466           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
467           'values': (2, 0, 1, 2),
468           'dense_shape': (2, 2, 2)},
469       'expected_sequence_length': [1, 2]}
470      )
471  def test_sequence_length(self, inputs_args, expected_sequence_length):
472    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
473    vocabulary_size = 3
474
475    categorical_column = sfc.sequence_categorical_column_with_identity(
476        key='aaa', num_buckets=vocabulary_size)
477    embedding_column = fc.embedding_column(
478        categorical_column, dimension=2)
479
480    _, sequence_length, _ = _get_sequence_dense_tensor_state(
481        embedding_column, {'aaa': inputs})
482
483    sequence_length = self.evaluate(sequence_length)
484    self.assertAllEqual(expected_sequence_length, sequence_length)
485    self.assertEqual(np.int64, sequence_length.dtype)
486
487  def test_sequence_length_with_empty_rows(self):
488    """Tests _sequence_length when some examples do not have ids."""
489    vocabulary_size = 3
490    sparse_input = sparse_tensor.SparseTensorValue(
491        # example 0, ids []
492        # example 1, ids [2]
493        # example 2, ids [0, 1]
494        # example 3, ids []
495        # example 4, ids [1]
496        # example 5, ids []
497        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
498        values=(2, 0, 1, 1),
499        dense_shape=(6, 2))
500    expected_sequence_length = [0, 1, 2, 0, 1, 0]
501
502    categorical_column = sfc.sequence_categorical_column_with_identity(
503        key='aaa', num_buckets=vocabulary_size)
504    embedding_column = fc.embedding_column(
505        categorical_column, dimension=2)
506
507    _, sequence_length, _ = _get_sequence_dense_tensor_state(
508        embedding_column, {'aaa': sparse_input})
509
510    self.assertAllEqual(
511        expected_sequence_length, self.evaluate(sequence_length))
512
513
514class SequenceSharedEmbeddingColumnTest(test.TestCase):
515
516  def test_get_sequence_dense_tensor(self):
517    vocabulary_size = 3
518    embedding_dimension = 2
519    embedding_values = (
520        (1., 2.),  # id 0
521        (3., 5.),  # id 1
522        (7., 11.)  # id 2
523    )
524
525    def _initializer(shape, dtype, partition_info=None):
526      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
527      self.assertEqual(dtypes.float32, dtype)
528      self.assertIsNone(partition_info)
529      return embedding_values
530
531    with ops.Graph().as_default():
532      sparse_input_a = sparse_tensor.SparseTensorValue(
533          # example 0, ids [2]
534          # example 1, ids [0, 1]
535          # example 2, ids []
536          # example 3, ids [1]
537          indices=((0, 0), (1, 0), (1, 1), (3, 0)),
538          values=(2, 0, 1, 1),
539          dense_shape=(4, 2))
540      sparse_input_b = sparse_tensor.SparseTensorValue(
541          # example 0, ids [1]
542          # example 1, ids [0, 2]
543          # example 2, ids [0]
544          # example 3, ids []
545          indices=((0, 0), (1, 0), (1, 1), (2, 0)),
546          values=(1, 0, 2, 0),
547          dense_shape=(4, 2))
548
549      expected_lookups_a = [
550          # example 0, ids [2]
551          [[7., 11.], [0., 0.]],
552          # example 1, ids [0, 1]
553          [[1., 2.], [3., 5.]],
554          # example 2, ids []
555          [[0., 0.], [0., 0.]],
556          # example 3, ids [1]
557          [[3., 5.], [0., 0.]],
558      ]
559
560      expected_lookups_b = [
561          # example 0, ids [1]
562          [[3., 5.], [0., 0.]],
563          # example 1, ids [0, 2]
564          [[1., 2.], [7., 11.]],
565          # example 2, ids [0]
566          [[1., 2.], [0., 0.]],
567          # example 3, ids []
568          [[0., 0.], [0., 0.]],
569      ]
570
571      categorical_column_a = sfc.sequence_categorical_column_with_identity(
572          key='aaa', num_buckets=vocabulary_size)
573      categorical_column_b = sfc.sequence_categorical_column_with_identity(
574          key='bbb', num_buckets=vocabulary_size)
575      shared_embedding_columns = fc.shared_embedding_columns_v2(
576          [categorical_column_a, categorical_column_b],
577          dimension=embedding_dimension,
578          initializer=_initializer)
579
580      embedding_lookup_a = _get_sequence_dense_tensor(
581          shared_embedding_columns[0], {'aaa': sparse_input_a})[0]
582      embedding_lookup_b = _get_sequence_dense_tensor(
583          shared_embedding_columns[1], {'bbb': sparse_input_b})[0]
584
585      self.evaluate(variables_lib.global_variables_initializer())
586      global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
587      self.assertItemsEqual(('aaa_bbb_shared_embedding:0',),
588                            tuple([v.name for v in global_vars]))
589      self.assertAllEqual(embedding_values, self.evaluate(global_vars[0]))
590      self.assertAllEqual(
591          expected_lookups_a, self.evaluate(embedding_lookup_a))
592      self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
593
594  def test_sequence_length(self):
595    with ops.Graph().as_default():
596      vocabulary_size = 3
597
598      sparse_input_a = sparse_tensor.SparseTensorValue(
599          # example 0, ids [2]
600          # example 1, ids [0, 1]
601          indices=((0, 0), (1, 0), (1, 1)),
602          values=(2, 0, 1),
603          dense_shape=(2, 2))
604      expected_sequence_length_a = [1, 2]
605      categorical_column_a = sfc.sequence_categorical_column_with_identity(
606          key='aaa', num_buckets=vocabulary_size)
607
608      sparse_input_b = sparse_tensor.SparseTensorValue(
609          # example 0, ids [0, 2]
610          # example 1, ids [1]
611          indices=((0, 0), (0, 1), (1, 0)),
612          values=(0, 2, 1),
613          dense_shape=(2, 2))
614      expected_sequence_length_b = [2, 1]
615      categorical_column_b = sfc.sequence_categorical_column_with_identity(
616          key='bbb', num_buckets=vocabulary_size)
617      shared_embedding_columns = fc.shared_embedding_columns_v2(
618          [categorical_column_a, categorical_column_b], dimension=2)
619
620      sequence_length_a = _get_sequence_dense_tensor(
621          shared_embedding_columns[0], {'aaa': sparse_input_a})[1]
622      sequence_length_b = _get_sequence_dense_tensor(
623          shared_embedding_columns[1], {'bbb': sparse_input_b})[1]
624
625      with _initialized_session() as sess:
626        sequence_length_a = sess.run(sequence_length_a)
627        self.assertAllEqual(expected_sequence_length_a, sequence_length_a)
628        self.assertEqual(np.int64, sequence_length_a.dtype)
629        sequence_length_b = sess.run(sequence_length_b)
630        self.assertAllEqual(expected_sequence_length_b, sequence_length_b)
631        self.assertEqual(np.int64, sequence_length_b.dtype)
632
633  def test_sequence_length_with_empty_rows(self):
634    """Tests _sequence_length when some examples do not have ids."""
635    with ops.Graph().as_default():
636      vocabulary_size = 3
637      sparse_input_a = sparse_tensor.SparseTensorValue(
638          # example 0, ids []
639          # example 1, ids [2]
640          # example 2, ids [0, 1]
641          # example 3, ids []
642          # example 4, ids [1]
643          # example 5, ids []
644          indices=((1, 0), (2, 0), (2, 1), (4, 0)),
645          values=(2, 0, 1, 1),
646          dense_shape=(6, 2))
647      expected_sequence_length_a = [0, 1, 2, 0, 1, 0]
648      categorical_column_a = sfc.sequence_categorical_column_with_identity(
649          key='aaa', num_buckets=vocabulary_size)
650
651      sparse_input_b = sparse_tensor.SparseTensorValue(
652          # example 0, ids [2]
653          # example 1, ids []
654          # example 2, ids []
655          # example 3, ids []
656          # example 4, ids [1]
657          # example 5, ids [0, 1]
658          indices=((0, 0), (4, 0), (5, 0), (5, 1)),
659          values=(2, 1, 0, 1),
660          dense_shape=(6, 2))
661      expected_sequence_length_b = [1, 0, 0, 0, 1, 2]
662      categorical_column_b = sfc.sequence_categorical_column_with_identity(
663          key='bbb', num_buckets=vocabulary_size)
664
665      shared_embedding_columns = fc.shared_embedding_columns_v2(
666          [categorical_column_a, categorical_column_b], dimension=2)
667
668      sequence_length_a = _get_sequence_dense_tensor(
669          shared_embedding_columns[0], {'aaa': sparse_input_a})[1]
670      sequence_length_b = _get_sequence_dense_tensor(
671          shared_embedding_columns[1], {'bbb': sparse_input_b})[1]
672
673      with _initialized_session() as sess:
674        self.assertAllEqual(
675            expected_sequence_length_a, sequence_length_a.eval(session=sess))
676        self.assertAllEqual(
677            expected_sequence_length_b, sequence_length_b.eval(session=sess))
678
679
680@test_util.run_all_in_graph_and_eager_modes
681class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
682
683  @parameterized.named_parameters(
684      {'testcase_name': '2D',
685       'inputs_args': {
686           # example 0, ids [2]
687           # example 1, ids [0, 1]
688           # example 2, ids []
689           # example 3, ids [1]
690           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
691           'values': (2, 0, 1, 1),
692           'dense_shape': (4, 2)},
693       'expected': [
694           # example 0, ids [2]
695           [[0., 0., 1.], [0., 0., 0.]],
696           # example 1, ids [0, 1]
697           [[1., 0., 0.], [0., 1., 0.]],
698           # example 2, ids []
699           [[0., 0., 0.], [0., 0., 0.]],
700           # example 3, ids [1]
701           [[0., 1., 0.], [0., 0., 0.]]]},
702      {'testcase_name': '3D',
703       'inputs_args': {
704           # example 0, ids [[2]]
705           # example 1, ids [[0, 1], [2]]
706           # example 2, ids []
707           # example 3, ids [[1], [2, 2]]
708           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
709                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
710           'values': (2, 0, 1, 2, 1, 2, 2),
711           'dense_shape': (4, 2, 2)},
712       'expected': [
713           # example 0, ids [[2]]
714           [[0., 0., 1.], [0., 0., 0.]],
715           # example 1, ids [[0, 1], [2]]
716           [[1., 1., 0.], [0., 0., 1.]],
717           # example 2, ids []
718           [[0., 0., 0.], [0., 0., 0.]],
719           # example 3, ids [[1], [2, 2]]
720           [[0., 1., 0.], [0., 0., 2.]]]}
721      )
722  def test_get_sequence_dense_tensor(self, inputs_args, expected):
723    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
724    vocabulary_size = 3
725
726    categorical_column = sfc.sequence_categorical_column_with_identity(
727        key='aaa', num_buckets=vocabulary_size)
728    indicator_column = fc.indicator_column(categorical_column)
729
730    indicator_tensor, _ = _get_sequence_dense_tensor(
731        indicator_column, {'aaa': inputs})
732
733    self.assertAllEqual(expected, self.evaluate(indicator_tensor))
734
735  @parameterized.named_parameters(
736      {'testcase_name': '2D',
737       'inputs_args': {
738           # example 0, ids [2]
739           # example 1, ids [0, 1]
740           'indices': ((0, 0), (1, 0), (1, 1)),
741           'values': (2, 0, 1),
742           'dense_shape': (2, 2)},
743       'expected_sequence_length': [1, 2]},
744      {'testcase_name': '3D',
745       'inputs_args': {
746           # example 0, ids [[2]]
747           # example 1, ids [[0, 1], [2]]
748           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
749           'values': (2, 0, 1, 2),
750           'dense_shape': (2, 2, 2)},
751       'expected_sequence_length': [1, 2]}
752      )
753  def test_sequence_length(self, inputs_args, expected_sequence_length):
754    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
755    vocabulary_size = 3
756
757    categorical_column = sfc.sequence_categorical_column_with_identity(
758        key='aaa', num_buckets=vocabulary_size)
759    indicator_column = fc.indicator_column(categorical_column)
760
761    _, sequence_length = _get_sequence_dense_tensor(
762        indicator_column, {'aaa': inputs})
763
764    sequence_length = self.evaluate(sequence_length)
765    self.assertAllEqual(expected_sequence_length, sequence_length)
766    self.assertEqual(np.int64, sequence_length.dtype)
767
768  def test_sequence_length_with_empty_rows(self):
769    """Tests _sequence_length when some examples do not have ids."""
770    vocabulary_size = 3
771    sparse_input = sparse_tensor.SparseTensorValue(
772        # example 0, ids []
773        # example 1, ids [2]
774        # example 2, ids [0, 1]
775        # example 3, ids []
776        # example 4, ids [1]
777        # example 5, ids []
778        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
779        values=(2, 0, 1, 1),
780        dense_shape=(6, 2))
781    expected_sequence_length = [0, 1, 2, 0, 1, 0]
782
783    categorical_column = sfc.sequence_categorical_column_with_identity(
784        key='aaa', num_buckets=vocabulary_size)
785    indicator_column = fc.indicator_column(categorical_column)
786
787    _, sequence_length = _get_sequence_dense_tensor(
788        indicator_column, {'aaa': sparse_input})
789
790    self.assertAllEqual(
791        expected_sequence_length, self.evaluate(sequence_length))
792
793
794@test_util.run_all_in_graph_and_eager_modes
795class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
796
797  def test_defaults(self):
798    a = sfc.sequence_numeric_column('aaa')
799    self.assertEqual('aaa', a.key)
800    self.assertEqual('aaa', a.name)
801    self.assertEqual((1,), a.shape)
802    self.assertEqual(0., a.default_value)
803    self.assertEqual(dtypes.float32, a.dtype)
804    self.assertIsNone(a.normalizer_fn)
805
806  def test_shape_saved_as_tuple(self):
807    a = sfc.sequence_numeric_column('aaa', shape=[1, 2])
808    self.assertEqual((1, 2), a.shape)
809
810  def test_shape_must_be_positive_integer(self):
811    with self.assertRaisesRegex(TypeError, 'shape dimensions must be integer'):
812      sfc.sequence_numeric_column('aaa', shape=[1.0])
813
814    with self.assertRaisesRegex(ValueError,
815                                'shape dimensions must be greater than 0'):
816      sfc.sequence_numeric_column('aaa', shape=[0])
817
818  def test_dtype_is_convertible_to_float(self):
819    with self.assertRaisesRegex(ValueError,
820                                'dtype must be convertible to float'):
821      sfc.sequence_numeric_column('aaa', dtype=dtypes.string)
822
823  def test_normalizer_fn_must_be_callable(self):
824    with self.assertRaisesRegex(TypeError, 'must be a callable'):
825      sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')
826
827  @parameterized.named_parameters(
828      {'testcase_name': '2D',
829       'inputs_args': {
830           # example 0, values [0., 1]
831           # example 1, [10.]
832           'indices': ((0, 0), (0, 1), (1, 0)),
833           'values': (0., 1., 10.),
834           'dense_shape': (2, 2)},
835       'expected': [
836           [[0.], [1.]],
837           [[10.], [0.]]]},
838      {'testcase_name': '3D',
839       'inputs_args': {
840           # feature 0, ids [[20, 3], [5]]
841           # feature 1, ids [[3], [8]]
842           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
843           'values': (20, 3, 5., 3., 8.),
844           'dense_shape': (2, 2, 2)},
845       'expected': [
846           [[20.], [3.], [5.], [0.]],
847           [[3.], [0.], [8.], [0.]]]},
848      )
849  def test_get_sequence_dense_tensor(self, inputs_args, expected):
850    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
851    numeric_column = sfc.sequence_numeric_column('aaa')
852
853    dense_tensor, _ = _get_sequence_dense_tensor(
854        numeric_column, {'aaa': inputs})
855    self.assertAllEqual(expected, self.evaluate(dense_tensor))
856
857  def test_get_sequence_dense_tensor_with_normalizer_fn(self):
858
859    def _increment_two(input_sparse_tensor):
860      return sparse_ops.sparse_add(
861          input_sparse_tensor,
862          sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2))
863      )
864
865    sparse_input = sparse_tensor.SparseTensorValue(
866        # example 0, values [[0.], [1]]
867        # example 1, [[10.]]
868        indices=((0, 0), (0, 1), (1, 0)),
869        values=(0., 1., 10.),
870        dense_shape=(2, 2))
871
872    # Before _increment_two:
873    #   [[0.], [1.]],
874    #   [[10.], [0.]],
875    # After _increment_two:
876    #   [[2.], [1.]],
877    #   [[10.], [2.]],
878    expected_dense_tensor = [
879        [[2.], [1.]],
880        [[10.], [2.]],
881    ]
882    numeric_column = sfc.sequence_numeric_column(
883        'aaa', normalizer_fn=_increment_two)
884
885    dense_tensor, _ = _get_sequence_dense_tensor(
886        numeric_column, {'aaa': sparse_input})
887
888    self.assertAllEqual(
889        expected_dense_tensor, self.evaluate(dense_tensor))
890
891  @parameterized.named_parameters(
892      {'testcase_name': '2D',
893       'sparse_input_args': {
894           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
895           # example 1, [[[10., 11.],  [12., 13.]]]
896           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
897                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
898           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
899           'dense_shape': (2, 8)},
900       'expected_dense_tensor': [
901           [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
902           [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]},
903      {'testcase_name': '3D',
904       'sparse_input_args': {
905           'indices': ((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
906                       (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
907                       (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
908           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
909           'dense_shape': (2, 2, 8)},
910       'expected_dense_tensor': [
911           [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]],
912            [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]],
913           [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]],
914            [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]},
915      )
916  def test_get_dense_tensor_multi_dim(
917      self, sparse_input_args, expected_dense_tensor):
918    """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
919    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
920    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
921
922    dense_tensor, _ = _get_sequence_dense_tensor(
923        numeric_column, {'aaa': sparse_input})
924
925    self.assertAllEqual(
926        expected_dense_tensor, self.evaluate(dense_tensor))
927
928  @parameterized.named_parameters(
929      {'testcase_name': '2D',
930       'inputs_args': {
931           # example 0, ids [2]
932           # example 1, ids [0, 1]
933           'indices': ((0, 0), (1, 0), (1, 1)),
934           'values': (2., 0., 1.),
935           'dense_shape': (2, 2)},
936       'expected_sequence_length': [1, 2],
937       'shape': (1,)},
938      {'testcase_name': '3D',
939       'inputs_args': {
940           # example 0, ids [[2]]
941           # example 1, ids [[0, 1], [2]]
942           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
943           'values': (2., 0., 1., 2.),
944           'dense_shape': (2, 2, 2)},
945       'expected_sequence_length': [1, 2],
946       'shape': (1,)},
947      {'testcase_name': '2D_with_shape',
948       'inputs_args': {
949           # example 0, ids [2]
950           # example 1, ids [0, 1]
951           'indices': ((0, 0), (1, 0), (1, 1)),
952           'values': (2., 0., 1.),
953           'dense_shape': (2, 2)},
954       'expected_sequence_length': [1, 1],
955       'shape': (2,)},
956      {'testcase_name': '3D_with_shape',
957       'inputs_args': {
958           # example 0, ids [[2]]
959           # example 1, ids [[0, 1], [2]]
960           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
961           'values': (2., 0., 1., 2.),
962           'dense_shape': (2, 2, 2)},
963       'expected_sequence_length': [1, 2],
964       'shape': (2,)},
965      )
966  def test_sequence_length(self, inputs_args, expected_sequence_length, shape):
967    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
968    numeric_column = sfc.sequence_numeric_column('aaa', shape=shape)
969
970    _, sequence_length = _get_sequence_dense_tensor(
971        numeric_column, {'aaa': inputs})
972
973    sequence_length = self.evaluate(sequence_length)
974    self.assertAllEqual(expected_sequence_length, sequence_length)
975    self.assertEqual(np.int64, sequence_length.dtype)
976
977  def test_sequence_length_with_empty_rows(self):
978    """Tests _sequence_length when some examples do not have ids."""
979    sparse_input = sparse_tensor.SparseTensorValue(
980        # example 0, values []
981        # example 1, values [[0.], [1.]]
982        # example 2, [[2.]]
983        # example 3, values []
984        # example 4, [[3.]]
985        # example 5, values []
986        indices=((1, 0), (1, 1), (2, 0), (4, 0)),
987        values=(0., 1., 2., 3.),
988        dense_shape=(6, 2))
989    expected_sequence_length = [0, 2, 1, 0, 1, 0]
990    numeric_column = sfc.sequence_numeric_column('aaa')
991
992    _, sequence_length = _get_sequence_dense_tensor(
993        numeric_column, {'aaa': sparse_input})
994
995    self.assertAllEqual(
996        expected_sequence_length, self.evaluate(sequence_length))
997
998  def test_serialization(self):
999    """Tests that column can be serialized."""
1000    def _custom_fn(input_tensor):
1001      return input_tensor + 42
1002
1003    column = sfc.sequence_numeric_column(
1004        key='my-key', shape=(2,), default_value=3, dtype=dtypes.int32,
1005        normalizer_fn=_custom_fn)
1006    configs = serialization.serialize_feature_column(column)
1007    column = serialization.deserialize_feature_column(
1008        configs, custom_objects={_custom_fn.__name__: _custom_fn})
1009    self.assertEqual(column.key, 'my-key')
1010    self.assertEqual(column.shape, (2,))
1011    self.assertEqual(column.default_value, 3)
1012    self.assertEqual(column.normalizer_fn(3), 45)
1013    with self.assertRaisesRegex(ValueError,
1014                                'Instance: 0 is not a FeatureColumn'):
1015      serialization.serialize_feature_column(int())
1016
1017  def test_parents(self):
1018    """Tests parents attribute of column."""
1019    column = sfc.sequence_numeric_column(key='my-key')
1020    self.assertEqual(column.parents, ['my-key'])
1021
1022
1023if __name__ == '__main__':
1024  test.main()
1025