File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -7,6 +7,8 @@ For convencience it re-exports the `bpe` crate so that depending on this crate i
77
88Supported token sets:
99
10+ - r50k
11+ - p50k
1012- cl100k
1113- o200k
1214
Original file line number Diff line number Diff line change @@ -7,6 +7,24 @@ use serde::Serialize;
77use tiktoken_rs:: CoreBPE ;
88
99fn main ( ) {
10+ serialize_tokens (
11+ "r50k" ,
12+ & tiktoken_rs:: r50k_base ( ) . expect ( "tiktoken initialization must not fail!" ) ,
13+ 50256 ,
14+ 1 ,
15+ ) ;
16+ serialize_tokens (
17+ "p50k" ,
18+ & tiktoken_rs:: p50k_base ( ) . expect ( "tiktoken initialization must not fail!" ) ,
19+ 50280 ,
20+ 1 ,
21+ ) ;
22+ serialize_tokens (
23+ "cl100k" ,
24+ & tiktoken_rs:: cl100k_base ( ) . expect ( "tiktoken initialization must not fail!" ) ,
25+ 100256 ,
26+ 17846336922010275747 ,
27+ ) ;
1028 serialize_tokens (
1129 "cl100k" ,
1230 & tiktoken_rs:: cl100k_base ( ) . expect ( "tiktoken initialization must not fail!" ) ,
Original file line number Diff line number Diff line change @@ -2,6 +2,16 @@ use std::sync::LazyLock;
22
33use bpe:: byte_pair_encoding:: BytePairEncoding ;
44
5+ static BPE_R50K : LazyLock < BytePairEncoding > = LazyLock :: new ( || {
6+ let bytes = include_bytes ! ( concat!( env!( "OUT_DIR" ) , "/bpe_r50k.dict" ) ) ;
7+ rmp_serde:: from_slice ( bytes) . expect ( "" )
8+ } ) ;
9+
10+ static BPE_P50K : LazyLock < BytePairEncoding > = LazyLock :: new ( || {
11+ let bytes = include_bytes ! ( concat!( env!( "OUT_DIR" ) , "/bpe_p50k.dict" ) ) ;
12+ rmp_serde:: from_slice ( bytes) . expect ( "" )
13+ } ) ;
14+
515static BPE_CL100K : LazyLock < BytePairEncoding > = LazyLock :: new ( || {
616 let bytes = include_bytes ! ( concat!( env!( "OUT_DIR" ) , "/bpe_cl100k.dict" ) ) ;
717 rmp_serde:: from_slice ( bytes) . expect ( "" )
@@ -14,6 +24,14 @@ static BPE_O200K: LazyLock<BytePairEncoding> = LazyLock::new(|| {
1424
1525pub use bpe:: * ;
1626
27+ pub fn r50k ( ) -> & ' static BytePairEncoding {
28+ & BPE_R50K
29+ }
30+
31+ pub fn p50k ( ) -> & ' static BytePairEncoding {
32+ & BPE_P50K
33+ }
34+
1735pub fn cl100k ( ) -> & ' static BytePairEncoding {
1836 & BPE_CL100K
1937}
@@ -26,6 +44,16 @@ pub fn o200k() -> &'static BytePairEncoding {
2644mod tests {
2745 use super :: * ;
2846
47+ #[ test]
48+ fn can_load_r50k ( ) {
49+ r50k ( ) . count ( "" . as_bytes ( ) ) ;
50+ }
51+
52+ #[ test]
53+ fn can_load_p50k ( ) {
54+ p50k ( ) . count ( "" . as_bytes ( ) ) ;
55+ }
56+
2957 #[ test]
3058 fn can_load_cl100k ( ) {
3159 cl100k ( ) . count ( "" . as_bytes ( ) ) ;
You can’t perform that action at this time.
0 commit comments