diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 1220aa197..c4c2df86b 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -225,6 +225,13 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): None if using greedy decoding (beam_size=1) } """ + if self._hparams.self_attention_type != "dot_product": + # Caching is not guaranteed to work with attention types other than + # dot_product. + # TODO(petershaw): Support fast decoding when using relative + # position representations, i.e. "dot_product_relative" attention. + return self._beam_decode_slow(features, decode_length, beam_size, + top_beams, alpha) with tf.variable_scope(self.name): return self._fast_decode( features, decode_length, beam_size, top_beams, alpha)