diff --git a/tensorflow/python/ops/embedding_variable_ops_test.py b/tensorflow/python/ops/embedding_variable_ops_test.py index 25a0cb6ff11..c6cdf951a1e 100644 --- a/tensorflow/python/ops/embedding_variable_ops_test.py +++ b/tensorflow/python/ops/embedding_variable_ops_test.py @@ -2816,5 +2816,79 @@ def testSetInitializedWithRestore(self): result = sess.run(var._is_initialized_op) self.assertEqual(True, result) + def testCountsTensor(self): + os.environ["TF_RECORD_FREQ"] = "1" + checkpoint_directory = self.get_temp_dir() + ckpt_path = os.path.join(checkpoint_directory, "model.ckpt") + with ops.Graph().as_default() as g, ops.device('/cpu:0'): + var = variable_scope.get_embedding_variable("var_1", + embedding_dim = 3) + sp1 = sparse_tensor.SparseTensor( + indices=[[0,0],[1,0],[2,0],[3,0],[4,0],[5,0]], + values=math_ops.cast([0,0,0,1,1,2], dtypes.int64), + dense_shape=[6, 1]) + sp2 = sparse_tensor.SparseTensor( + indices=[[0,0],[1,0],[2,0],[3,0],[4,0],[5,0]], + values=math_ops.cast([3,3,3,4,4,1], dtypes.int64), + dense_shape=[6, 1]) + emb1 = embedding_ops.embedding_lookup_sparse(var, sp1, None) + emb2 = embedding_ops.embedding_lookup_sparse(var, sp2, None) + emb = emb1 + emb2 + fun = math_ops.multiply(emb, 2.0, name='multiply') + loss = math_ops.reduce_sum(fun, name='reduce_sum') + gs = training_util.get_or_create_global_step() + opt = adagrad_decay.AdagradDecayOptimizer(0.1, gs) + g_v = opt.compute_gradients(loss) + train_op = opt.apply_gradients(g_v) + saver = saver_module.Saver() + init = variables.global_variables_initializer() + with self.test_session(graph=g) as sess: + sess.run([init]) + sess.run(train_op) + saver.save(sess, ckpt_path) + + for name, shape in checkpoint_utils.list_variables(ckpt_path): + if name == "var_1-freqs": + value = checkpoint_utils.load_variable(ckpt_path, name) + self.assertAllEqual(value, [3, 3, 1, 3, 2]) + + def testCountsTensorWithGradientDescent(self): + os.environ["TF_RECORD_FREQ"] = "1" + checkpoint_directory = self.get_temp_dir() + ckpt_path = os.path.join(checkpoint_directory, "model.ckpt") + with ops.Graph().as_default() as g, ops.device('/cpu:0'): + var = variable_scope.get_embedding_variable("var_1", + embedding_dim = 3) + sp1 = sparse_tensor.SparseTensor( + indices=[[0,0],[1,0],[2,0],[3,0],[4,0],[5,0]], + values=math_ops.cast([0,0,0,1,1,2], dtypes.int64), + dense_shape=[6, 1]) + sp2 = sparse_tensor.SparseTensor( + indices=[[0,0],[1,0],[2,0],[3,0],[4,0],[5,0]], + values=math_ops.cast([3,3,3,4,4,1], dtypes.int64), + dense_shape=[6, 1]) + emb1 = embedding_ops.embedding_lookup_sparse(var, sp1, None) + emb2 = embedding_ops.embedding_lookup_sparse(var, sp2, None) + emb = emb1 + emb2 + fun = math_ops.multiply(emb, 2.0, name='multiply') + loss = math_ops.reduce_sum(fun, name='reduce_sum') + gs = training_util.get_or_create_global_step() + opt = gradient_descent.GradientDescentOptimizer(0.1) + g_v = opt.compute_gradients(loss) + train_op = opt.apply_gradients(g_v) + saver = saver_module.Saver() + init = variables.global_variables_initializer() + with self.test_session(graph=g) as sess: + sess.run([init]) + sess.run(train_op) + saver.save(sess, ckpt_path) + + for name, shape in checkpoint_utils.list_variables(ckpt_path): + if name == "var_1-freqs": + value = checkpoint_utils.load_variable(ckpt_path, name) + self.assertAllEqual(value, [3, 3, 1, 3, 2]) + + del os.environ["TF_RECORD_FREQ"] + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/kv_variable_ops.py b/tensorflow/python/ops/kv_variable_ops.py index 701c03f6975..96329ca345b 100644 --- a/tensorflow/python/ops/kv_variable_ops.py +++ b/tensorflow/python/ops/kv_variable_ops.py @@ -368,7 +368,7 @@ def _init_from_args(self, self._dtype = initial_value.dtype.base_dtype self._constraint = constraint self._gather_op = None - self._counts_tensor = None + self._counts_tensor = {} if self._is_primary: self._slot_num = 0 else: @@ -850,7 +850,7 @@ def sparse_read(self, indices, name=None, ev_init_value=None, counts=None): default_value, counts, is_inference=True, name=name) - self._counts_tensor = counts + self._counts_tensor[indices] = counts else: value = gen_kv_variable_ops.kv_resource_gather(self._handle, indices, diff --git a/tensorflow/python/training/gradient_descent.py b/tensorflow/python/training/gradient_descent.py index 32a12a0554f..799e3c5f5bd 100644 --- a/tensorflow/python/training/gradient_descent.py +++ b/tensorflow/python/training/gradient_descent.py @@ -71,12 +71,23 @@ def _resource_apply_dense(self, grad, handle): def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices): if isinstance(handle, kv_variable_ops.EmbeddingVariable): global_step = training_util.get_or_create_global_step() - if handle.need_counts() and handle._counts_tensor is not None: + if handle.need_counts() and len(handle._counts_tensor.keys()) != 0: + if indices.op.type == "ConcatV2": + total_counts = [] + for tensor in indices.op.inputs: + if tensor.op.type == "Reshape": + indices_tensor = tensor.op.inputs[0] + total_counts.append(handle._counts_tensor[indices_tensor]) + from tensorflow.python.ops import array_ops + counts_tensor = array_ops.concat(total_counts, 0) + elif indices.op.type == "Reshape": + indices_tensor = indices.op.inputs[0] + counts_tensor = handle._counts_tensor[indices_tensor] return training_ops.kv_resource_sparse_apply_gradient_descent_with_counts( handle.handle, math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype), grad, indices, global_step, - handle._counts_tensor, use_locking=self._use_locking) + counts_tensor, use_locking=self._use_locking) else: return training_ops.kv_resource_sparse_apply_gradient_descent( handle.handle, math_ops.cast(self._learning_rate_tensor, diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 578d682cc11..7523604ccf9 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -93,6 +93,18 @@ def _deduplicate_indexed_slices_with_counts(values, indices): array_ops.shape(unique_indices)[0]) return (summed_values, unique_indices, indices_counts) +def _deduplicate_indexed_slices_with_counts_reduction(values, indices, counts): + """Sums `values` associated with any non-unique `indices` + and return counts of each count in `values`.""" + unique_indices, new_index_positions = array_ops.unique(indices) + summed_values = math_ops.unsorted_segment_sum( + values, new_index_positions, + array_ops.shape(unique_indices)[0]) + summed_counts = math_ops.unsorted_segment_sum( + counts, new_index_positions, + array_ops.shape(unique_indices)[0]) + return (summed_values, unique_indices, summed_counts) + def _var_key(var): # TODO(ashankar): Consolidate handling for eager and graph if hasattr(var, "op"): @@ -1088,14 +1100,24 @@ def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices): """ from tensorflow.python.ops import kv_variable_ops if isinstance(handle, kv_variable_ops.EmbeddingVariable) and handle.need_counts(): - if handle._counts_tensor is None: + if len(handle._counts_tensor.keys()) == 0: summed_grad, unique_indices, indices_counts = \ _deduplicate_indexed_slices_with_counts( values=grad, indices=indices) else: - summed_grad, unique_indices = _deduplicate_indexed_slices( - values=grad, indices=indices) - indices_counts = handle._counts_tensor + if indices.op.type == "ConcatV2": + total_counts = [] + for tensor in indices.op.inputs: + if tensor.op.type == "Reshape": + indices_tensor = tensor.op.inputs[0] + total_counts.append(handle._counts_tensor[indices_tensor]) + counts_tensor = array_ops.concat(total_counts, 0) + elif indices.op.type == "Reshape": + indices_tensor = indices.op.inputs[0] + counts_tensor = handle._counts_tensor[indices_tensor] + summed_grad, unique_indices, indices_counts = \ + _deduplicate_indexed_slices_with_counts_reduction( + grad, indices, counts_tensor) return self._resource_apply_sparse( summed_grad, handle, unique_indices, indices_counts) else: