Skip to content

Commit a10cce2

Browse files
committed
check single-len tokens as well
1 parent 08d4200 commit a10cce2

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

crates/bpe/src/byte_pair_encoding.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ impl BytePairEncoding {
604604
best = (m.value(), 1);
605605
break;
606606
} else if (last_token[m.start() - 1].1 + 1 < best.1)
607-
& !(forbidden_tokens_set.contains(&m.value()))
607+
& (!(forbidden_tokens_set.contains(&m.value())) | ((m.end() - m.start()) == 1))
608608
{
609609
best = (m.value(), last_token[m.start() - 1].1 + 1);
610610
}

0 commit comments

Comments
 (0)