Skip to content

Commit 56214ef

Browse files
committed
fix count_till_limit function
1 parent 6e03fd0 commit 56214ef

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

crates/bpe-openai/src/lib.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,10 @@ impl Tokenizer {
9393
/// token limit is much smaller than the provided text. Applies pre-tokenization before counting.
9494
pub fn count_till_limit(&self, text: &str, token_limit: usize) -> Option<usize> {
9595
self.split(text)
96-
.try_fold(token_limit, |token_limit, piece| {
96+
.try_fold(0, |consumed, piece| {
9797
self.bpe
98-
.count_till_limit(piece.as_bytes(), token_limit)
99-
.map(|piece_count| token_limit - piece_count)
98+
.count_till_limit(piece.as_bytes(), token_limit - consumed)
99+
.map(|piece_count| consumed + piece_count)
100100
})
101101
}
102102

@@ -231,4 +231,12 @@ mod tests {
231231
}
232232
}
233233
}
234+
235+
#[test]
236+
fn test_count_till_limit() {
237+
assert_eq!(cl100k_base().count_till_limit("abc", 3), Some(1));
238+
assert_eq!(cl100k_base().count_till_limit("abcabc", 3), Some(2));
239+
assert_eq!(cl100k_base().count_till_limit("abcabcabc", 3), Some(3));
240+
assert_eq!(cl100k_base().count_till_limit("abcabcabcabc", 3), None);
241+
}
234242
}

0 commit comments

Comments
 (0)