Skip to content

Commit 477d1af

Browse files
author
Hendrik van Antwerpen
committed
Add count_till_limit on Tokenizer
1 parent e20fc1a commit 477d1af

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

crates/bpe-openai/src/lib.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@ impl Tokenizer {
8686
.sum()
8787
}
8888

89+
pub fn count_till_limit(&self, text: &str, token_limit: usize) -> Option<usize> {
90+
self.split(text)
91+
.try_fold(token_limit, |token_limit, piece| {
92+
self.bpe
93+
.count_till_limit(piece.as_bytes(), token_limit)
94+
.map(|piece_count| token_limit - piece_count)
95+
})
96+
}
97+
8998
pub fn encode(&self, text: &str) -> Vec<u32> {
9099
self.split(text)
91100
.flat_map(|piece| self.bpe.encode_via_backtracking(piece.as_bytes()))

0 commit comments

Comments
 (0)