Merged
Changes from all commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Failed to load files.
fix typo which make tgt tensor data wrong
  • Loading branch information
@zwzmzd
zwzmzd committedJun 14, 2025
commit a81359fddd173962957dd782f4530bacc619a3c7
Original file line numberDiff line numberDiff line change
Expand Up@@ -42,14 +42,14 @@ def get_data(opts):

# Build a vocabulary object for these languages
src_vocab = build_vocab_from_iterator(
_yield_tokens(train_iterator, src_tokenizer, src_lang),
_yield_tokens(train_iterator, src_tokenizer, True),
min_freq=1,
specials=list(special_symbols.keys()),
special_first=True
)

tgt_vocab = build_vocab_from_iterator(
_yield_tokens(train_iterator, tgt_tokenizer, tgt_lang),
_yield_tokens(train_iterator, tgt_tokenizer, False),
min_freq=1,
specials=list(special_symbols.keys()),
special_first=True
Expand Down