Skip to content
This repository has been archived by the owner on Jul 7, 2023. It is now read-only.

Commit

Permalink
Standardized EOS token
Browse files Browse the repository at this point in the history
  • Loading branch information
vthorsteinsson committed Jul 14, 2017
1 parent 27c6185 commit 7bf4936
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions tensor2tensor/data_generators/wmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@
FLAGS = tf.flags.FLAGS


# End-of-sentence marker (should correspond to the position of EOS in the
# RESERVED_TOKENS list in text_encoder.py)
EOS = 1
# End-of-sentence marker
EOS = text_encoder.EOS_TOKEN


def character_generator(source_path, target_path, character_vocab, eos=None):
Expand Down Expand Up @@ -183,7 +182,7 @@ def ende_bpe_token_generator(tmp_dir, train):
train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path)
token_path = os.path.join(tmp_dir, "vocab.bpe.32000")
token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path)
return token_generator(train_path + ".en", train_path + ".de", token_vocab, 1)
return token_generator(train_path + ".en", train_path + ".de", token_vocab, EOS)


_ENDE_TRAIN_DATASETS = [
Expand Down

0 comments on commit 7bf4936

Please sign in to comment.