Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/bpe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ aneubeck-daachorse = "1.1.1"
base64 = { version = "0.22", optional = true }
fnv = "1.0"
itertools = "0.14"
rand = { version = "0.8", optional = true }
rand = { version = "0.9", optional = true }
serde = { version = "1", features = ["derive"] }

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion crates/bpe/benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ test = true
bpe = { path = "../../bpe" }
bpe-openai = { path = "../../bpe-openai" }
criterion = "0.5"
rand = "0.8"
rand = "0.9"
tiktoken-rs = "0.6"
tokenizers = { version = "0.21", features = ["http"] }
16 changes: 8 additions & 8 deletions crates/bpe/src/byte_pair_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ pub fn find_hash_factor_for_dictionary(tokens: impl IntoIterator<Item = Vec<u8>>
use rand::Rng;

let all_tokens = tokens.into_iter().collect_vec();
let mut rnd = rand::thread_rng();
let mut rnd = rand::rng();
loop {
let factor: u64 = rnd.gen();
let factor: u64 = rnd.random();
let mut seen = HashSet::new();
if all_tokens
.iter()
Expand Down Expand Up @@ -568,7 +568,7 @@ pub fn create_test_string_with_predicate(
min_bytes: usize,
predicate: impl Fn(&str) -> bool,
) -> String {
use rand::{thread_rng, Rng};
use rand::{rng, Rng};
// the string we accumulated thus far
let mut result = String::new();
// the tokens we added so we can backtrack
Expand All @@ -577,7 +577,7 @@ pub fn create_test_string_with_predicate(
// try a few times to find a suitable token
'next: for _ in 0..8 {
// pick a random token and provisionally add it
let i = thread_rng().gen_range(0..bpe.num_tokens()) as u32;
let i = rng().random_range(0..bpe.num_tokens()) as u32;
// We only use tokens that are valid UTF-8. This is true for ~99% of tokens in OpenAI's
// token set. The chance of constructing a valid UTF-8 character across a token boundary
// by picking random tokens is so small that it is unlikely to happen anyway.
Expand All @@ -603,8 +603,8 @@ pub fn create_test_string_with_predicate(

#[cfg(feature = "rand")]
pub fn select_test_string(text: &str, min_bytes: usize) -> &str {
use rand::{thread_rng, Rng};
let mut start = thread_rng().gen_range(0..text.len() - min_bytes);
use rand::{rng, Rng};
let mut start = rng().random_range(0..text.len() - min_bytes);
while !text.is_char_boundary(start) {
start -= 1;
}
Expand All @@ -618,10 +618,10 @@ pub fn select_test_string(text: &str, min_bytes: usize) -> &str {
/// Generate test bytes by concatenating random tokens.
#[cfg(feature = "rand")]
pub fn create_test_bytes(bpe: &BytePairEncoding, min_bytes: usize) -> Vec<u8> {
use rand::{thread_rng, Rng};
use rand::{rng, Rng};
let mut result = Vec::new();
while result.len() < min_bytes {
let i = thread_rng().gen_range(0..bpe.num_tokens());
let i = rng().random_range(0..bpe.num_tokens());
result.extend(bpe.token_bytes(i as u32));
}
result
Expand Down
2 changes: 1 addition & 1 deletion crates/bpe/tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ edition = "2021"
bpe = { path = "../../bpe", features = ["rand"] }
bpe-openai = { path = "../../bpe-openai" }
itertools = "0.14"
rand = "0.8"
rand = "0.9"
tiktoken-rs = "0.6"
6 changes: 3 additions & 3 deletions crates/bpe/tests/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[cfg(test)]
mod tests {
use itertools::Itertools;
use rand::{thread_rng, Rng};
use rand::{rng, Rng};
use tiktoken_rs::cl100k_base_singleton;

use bpe::appendable_encoder::AppendableEncoder;
Expand Down Expand Up @@ -122,8 +122,8 @@ mod tests {
let input = create_test_bytes(bpe, 10000);
let intervals = IntervalEncoding::new(bpe, &input);
for _ in 0..1000 {
let start = thread_rng().gen_range(0..input.len());
let end = thread_rng().gen_range(0..input.len());
let start = rng().random_range(0..input.len());
let end = rng().random_range(0..input.len());
let range = start.min(end)..start.max(end);
assert_eq!(
intervals.count(range.clone()),
Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ fnv = "1.0"
hyperloglogplus = { version = "0.4", optional = true }
itertools = "0.14"
once_cell = "1.18"
rand = { version = "0.8", optional = true }
rand = { version = "0.9", optional = true }
rayon = { version = "1.7", optional = true }
regex = { version = "1", optional = true }

[dev-dependencies]
criterion = "0.5"
geo_filters = { path = ".", features = ["evaluation"] }
rand = "0.8"
rand_chacha = "0.3"
rand = "0.9"
rand_chacha = "0.9"
rayon = "1.7"

[[bench]]
Expand Down
2 changes: 1 addition & 1 deletion crates/geo_filters/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ pub(crate) mod tests {

/// Runs estimation trials and returns the average precision and variance.
pub(crate) fn test_estimate<M: Method, C: Count<M>>(f: impl Fn() -> C) -> (f32, f32) {
let mut rnd = rand::rngs::StdRng::from_entropy();
let mut rnd = rand::rngs::StdRng::from_os_rng();
let cnt = 10000usize;
let mut avg_precision = 0.0;
let mut avg_var = 0.0;
Expand Down
2 changes: 1 addition & 1 deletion crates/geo_filters/src/config/lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ mod tests {
let phi = phi_f64(B);
let buckets = HashToBucketLookup::new(B);
let mut var = 0.0;
let mut rnd = rand::rngs::StdRng::from_entropy();
let mut rnd = rand::rngs::StdRng::from_os_rng();
for _ in 0..n {
let hash = rnd.next_u64();
let estimate = buckets.lookup(hash) as f64;
Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/diff_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ mod tests {

#[test]
fn test_estimate_diff_size_fast() {
let mut rnd = rand::rngs::StdRng::from_entropy();
let mut rnd = rand::rngs::StdRng::from_os_rng();
let mut a_p = GeoDiffCount7_50::default();
let mut a_hp = GeoDiffCount7::default();
let mut b_p = GeoDiffCount7_50::default();
Expand Down Expand Up @@ -500,7 +500,7 @@ mod tests {

#[test]
fn test_xor_plus_mask() {
let mut rnd = rand::rngs::StdRng::from_entropy();
let mut rnd = rand::rngs::StdRng::from_os_rng();
let mask_size = 12;
let mask = 0b100001100000;
let mut a = GeoDiffCount7::default();
Expand All @@ -527,7 +527,7 @@ mod tests {

#[test]
fn test_bit_chunks() {
let mut rnd = rand::rngs::StdRng::from_entropy();
let mut rnd = rand::rngs::StdRng::from_os_rng();
for _ in 0..100 {
let mut expected = GeoDiffCount7::default();
for _ in 0..1000 {
Expand Down
4 changes: 2 additions & 2 deletions crates/geo_filters/src/distinct_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ mod tests {

#[test]
fn test_estimate_union_size_fast() {
let mut rnd = rand::rngs::StdRng::from_entropy();
let mut rnd = rand::rngs::StdRng::from_os_rng();
let mut a = GeoDistinctCount7::default();
let mut b = GeoDistinctCount7::default();
for _ in 0..10000 {
Expand Down Expand Up @@ -374,7 +374,7 @@ mod tests {

#[test]
fn test_bit_chunks() {
let mut rnd = rand::rngs::StdRng::from_entropy();
let mut rnd = rand::rngs::StdRng::from_os_rng();
for _ in 0..100 {
let mut expected = GeoDistinctCount7::default();
for _ in 0..1000 {
Expand Down
2 changes: 1 addition & 1 deletion crates/geo_filters/src/evaluation/simulation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ pub fn simulate<F: Fn() -> Box<dyn SimulationCount> + Send + Sync>(
.map(|_| {
let mut t = f();
let mut last_set_size = 0;
let mut rnd = rand::rngs::StdRng::from_entropy();
let mut rnd = rand::rngs::StdRng::from_os_rng();
set_sizes
.iter()
.map(move |set_size| {
Expand Down
4 changes: 2 additions & 2 deletions crates/string-offsets/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ keywords = ["unicode", "positions", "utf16", "characters", "lines"]
categories = ["algorithms", "data-structures", "text-processing", "development-tools::ffi"]

[dev-dependencies]
rand = "0.8"
rand_chacha = "0.3"
rand = "0.9"
rand_chacha = "0.9"
8 changes: 4 additions & 4 deletions crates/string-offsets/src/bitrank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,12 +214,12 @@ impl BitRank {

#[cfg(test)]
mod tests {
use rand::distributions::Uniform;
use super::*;
use rand::distr::Uniform;
use rand::prelude::*;
use rand_chacha::rand_core::SeedableRng;
use rand_chacha::ChaCha8Rng;

use super::*;

/// Creates a `BitRank` containing the integers in `iter` (which should be strictly
/// increasing).
pub fn bitrank<I: IntoIterator<Item = usize>>(iter: I) -> BitRank {
Expand Down Expand Up @@ -304,7 +304,7 @@ mod tests {
#[test]
fn test_rank_large_random() {
let mut rng = ChaCha8Rng::seed_from_u64(2);
let uniform = Uniform::<usize>::from(0..1_000_000);
let uniform = Uniform::new(0, 1_000_000).unwrap();
let mut random_bits = Vec::with_capacity(100_000);
for _ in 0..100_000 {
random_bits.push(uniform.sample(&mut rng));
Expand Down