Skip to content

Commit 91ef54d

Browse files
committed
rename
1 parent d913518 commit 91ef54d

File tree

4 files changed

+17
-17
lines changed

4 files changed

+17
-17
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
authors = ["The blackbird team <support@github.com>"]
33
edition = "2021"
4-
name = "utf8-converter"
4+
name = "string-offests"
55
version = "0.1.0"
66

77
[dependencies]
File renamed without changes.
Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use bitrank::{BitRank, BitRankBuilder};
1111
///
1212
/// Rust strings are UTF-8, but JavaScript has UTF-16 strings, while in Python, strings are
1313
/// sequences of Unicode code points. It's therefore necessary to adjust string positions when
14-
/// communicating across programming language boundaries. [`Utf8Converter`] does these adjustments.
14+
/// communicating across programming language boundaries. [`StringOffsets`] does these adjustments.
1515
///
1616
/// ## Converting offsets
1717
///
@@ -24,7 +24,7 @@ use bitrank::{BitRank, BitRankBuilder};
2424
/// - `utf16_pos` - Zero-based line number and `utf16` offset within the line.
2525
/// - `char_pos` - Zero-based line number and `char` offset within the line.
2626
///
27-
/// For example, [`Utf8Converter::utf8_to_utf16`] converts a Rust byte offset to a number that will
27+
/// For example, [`StringOffsets::utf8_to_utf16`] converts a Rust byte offset to a number that will
2828
/// index to the same position in a JavaScript string. Offsets are expressed as `u32` or [`Pos`]
2929
/// values.
3030
///
@@ -46,14 +46,14 @@ use bitrank::{BitRank, BitRankBuilder};
4646
/// When mapping offsets to line ranges, it is important to use a `_to_lines` function in order to
4747
/// end up with the correct line range. We have these methods because if you tried to do it
4848
/// yourself you would screw it up; use them! (And see the source code for
49-
/// [`Utf8Converter::utf8s_to_lines`] if you don't believe us.)
49+
/// [`StringOffsets::utf8s_to_lines`] if you don't believe us.)
5050
///
5151
/// ## Complexity
5252
///
5353
/// Most operations run in O(1) time, some require O(log n) time. The memory consumed by this data
5454
/// structure is typically less than the memory occupied by the actual content. In the best case,
5555
/// it requires ~25% of the content space.
56-
pub struct Utf8Converter {
56+
pub struct StringOffsets {
5757
// Vector storing for every line the byte position at which the line starts.
5858
line_begins: Vec<u32>,
5959

@@ -79,7 +79,7 @@ pub struct Pos {
7979
/// Zero-indexed line number.
8080
pub line: u32,
8181
/// Zero-indexed column number. The units of this field depend on the method that produces the
82-
/// value. See [`Utf8Converter::utf8_to_char_pos`], [`Utf8Converter::utf8_to_utf16_pos`].
82+
/// value. See [`StringOffsets::utf8_to_char_pos`], [`StringOffsets::utf8_to_utf16_pos`].
8383
pub col: u32,
8484
}
8585

@@ -101,15 +101,15 @@ pub struct Pos {
101101
// Question: Consider whether we should return an empty line range in this case which would
102102
// probably be consistent from a mathematical point of view. But then we should also return empty
103103
// line ranges for empty character ranges in the middle of a line...
104-
impl Utf8Converter {
104+
impl StringOffsets {
105105
/// Collects position information for the given string.
106106
pub fn new(content: &str) -> Self {
107107
new_converter(content.as_bytes())
108108
}
109109

110110
/// Collects position information for a byte-string.
111111
///
112-
/// If `content` is UTF-8, this is just like [`Utf8Converter::new`]. Otherwise, the
112+
/// If `content` is UTF-8, this is just like [`StringOffsets::new`]. Otherwise, the
113113
/// conversion methods involving characters will produce unspecified (but memory-safe) results.
114114
pub fn from_bytes(content: &[u8]) -> Self {
115115
new_converter(content)
@@ -293,7 +293,7 @@ impl Utf8Converter {
293293
}
294294
}
295295

296-
fn new_converter(content: &[u8]) -> Utf8Converter {
296+
fn new_converter(content: &[u8]) -> StringOffsets {
297297
let mut utf8_builder = BitRankBuilder::new();
298298
let mut utf16_builder = BitRankBuilder::new();
299299
let mut line_builder = BitRankBuilder::new();
@@ -334,7 +334,7 @@ fn new_converter(content: &[u8]) -> Utf8Converter {
334334
line_builder.push(content.len() - 1);
335335
}
336336

337-
Utf8Converter {
337+
StringOffsets {
338338
line_begins,
339339
utf8_to_line: line_builder.finish(),
340340
whitespace_only,
@@ -378,7 +378,7 @@ fn utf8_to_utf16_width(content: &[u8]) -> usize {
378378
#[cfg(test)]
379379
mod test {
380380
use super::is_char_boundary;
381-
use crate::{utf8_to_utf16_width, utf8_width, Pos, Utf8Converter};
381+
use crate::{utf8_to_utf16_width, utf8_width, Pos, StringOffsets};
382382

383383
#[test]
384384
fn test_utf8_char_width() {
@@ -417,7 +417,7 @@ mod test {
417417
let content = r#"a short line.
418418
followed by another one.
419419
no terminating newline!"#;
420-
let lines = Utf8Converter::new(content);
420+
let lines = StringOffsets::new(content);
421421
assert_eq!(lines.line_to_utf8s(0), 0..14);
422422
assert_eq!(&content[0..14], "a short line.\n");
423423
assert_eq!(lines.line_to_utf8s(1), 14..39);
@@ -463,7 +463,7 @@ no terminating newline!"#;
463463
fn test_convert_ascii() {
464464
let content = r#"line0
465465
line1"#;
466-
let lines = Utf8Converter::new(content);
466+
let lines = StringOffsets::new(content);
467467
assert_eq!(lines.utf8_to_char_pos(0), pos(0, 0));
468468
assert_eq!(lines.utf8_to_char_pos(1), pos(0, 1));
469469
assert_eq!(lines.utf8_to_char_pos(6), pos(1, 0));
@@ -476,7 +476,7 @@ line1"#;
476476
let content = r#"❤️ line0
477477
line1
478478
✅ line2"#;
479-
let lines = Utf8Converter::new(content);
479+
let lines = StringOffsets::new(content);
480480
assert_eq!(lines.utf8_to_char_pos(0), pos(0, 0)); // ❤️ takes 6 bytes to represent in utf8 (2 code points)
481481
assert_eq!(lines.utf8_to_char_pos(1), pos(0, 0));
482482
assert_eq!(lines.utf8_to_char_pos(2), pos(0, 0));
@@ -507,7 +507,7 @@ line1
507507
fn test_small() {
508508
// Á - 2 bytes utf8
509509
let content = r#"❤️ line0 ❤️Á 👋"#;
510-
let lines = Utf8Converter::new(content);
510+
let lines = StringOffsets::new(content);
511511
let mut utf16_index = 0;
512512
let mut char_index = 0;
513513
for (byte_index, char) in content.char_indices() {
@@ -527,7 +527,7 @@ line1
527527
// ^~~~ utf8: 1 char, 1 byte, utf16: 1 code unit
528528
// ^~~~~ utf8: 1 char, 2 bytes, utf16: 1 code unit
529529
// ^~~~~~ utf8: 2 chars, 3 byte ea., utf16: 2 code units
530-
let lines = Utf8Converter::new(content);
530+
let lines = StringOffsets::new(content);
531531

532532
// UTF-16 positions
533533
assert_eq!(lines.utf8_to_utf16_pos(0), pos(0, 0)); // ❤️
@@ -573,7 +573,7 @@ line1
573573
#[test]
574574
fn test_critical_input_len() {
575575
let content = [b'a'; 16384];
576-
let lines = Utf8Converter::from_bytes(&content);
576+
let lines = StringOffsets::from_bytes(&content);
577577
assert_eq!(lines.utf8_to_utf16_pos(16384), pos(1, 0));
578578
}
579579
}

0 commit comments

Comments
 (0)