@@ -11,7 +11,7 @@ use bitrank::{BitRank, BitRankBuilder};
1111///
1212/// Rust strings are UTF-8, but JavaScript has UTF-16 strings, while in Python, strings are
1313/// sequences of Unicode code points. It's therefore necessary to adjust string positions when
14- /// communicating across programming language boundaries. [`Utf8Converter `] does these adjustments.
14+ /// communicating across programming language boundaries. [`StringOffsets `] does these adjustments.
1515///
1616/// ## Converting offsets
1717///
@@ -24,7 +24,7 @@ use bitrank::{BitRank, BitRankBuilder};
2424/// - `utf16_pos` - Zero-based line number and `utf16` offset within the line.
2525/// - `char_pos` - Zero-based line number and `char` offset within the line.
2626///
27- /// For example, [`Utf8Converter ::utf8_to_utf16`] converts a Rust byte offset to a number that will
27+ /// For example, [`StringOffsets ::utf8_to_utf16`] converts a Rust byte offset to a number that will
2828/// index to the same position in a JavaScript string. Offsets are expressed as `u32` or [`Pos`]
2929/// values.
3030///
@@ -46,14 +46,14 @@ use bitrank::{BitRank, BitRankBuilder};
4646/// When mapping offsets to line ranges, it is important to use a `_to_lines` function in order to
4747/// end up with the correct line range. We have these methods because if you tried to do it
4848/// yourself you would screw it up; use them! (And see the source code for
49- /// [`Utf8Converter ::utf8s_to_lines`] if you don't believe us.)
49+ /// [`StringOffsets ::utf8s_to_lines`] if you don't believe us.)
5050///
5151/// ## Complexity
5252///
5353/// Most operations run in O(1) time, some require O(log n) time. The memory consumed by this data
5454/// structure is typically less than the memory occupied by the actual content. In the best case,
5555/// it requires ~25% of the content space.
56- pub struct Utf8Converter {
56+ pub struct StringOffsets {
5757 // Vector storing for every line the byte position at which the line starts.
5858 line_begins : Vec < u32 > ,
5959
@@ -79,7 +79,7 @@ pub struct Pos {
7979 /// Zero-indexed line number.
8080 pub line : u32 ,
8181 /// Zero-indexed column number. The units of this field depend on the method that produces the
82- /// value. See [`Utf8Converter ::utf8_to_char_pos`], [`Utf8Converter ::utf8_to_utf16_pos`].
82+ /// value. See [`StringOffsets ::utf8_to_char_pos`], [`StringOffsets ::utf8_to_utf16_pos`].
8383 pub col : u32 ,
8484}
8585
@@ -101,15 +101,15 @@ pub struct Pos {
101101// Question: Consider whether we should return an empty line range in this case which would
102102// probably be consistent from a mathematical point of view. But then we should also return empty
103103// line ranges for empty character ranges in the middle of a line...
104- impl Utf8Converter {
104+ impl StringOffsets {
105105 /// Collects position information for the given string.
106106 pub fn new ( content : & str ) -> Self {
107107 new_converter ( content. as_bytes ( ) )
108108 }
109109
110110 /// Collects position information for a byte-string.
111111 ///
112- /// If `content` is UTF-8, this is just like [`Utf8Converter ::new`]. Otherwise, the
112+ /// If `content` is UTF-8, this is just like [`StringOffsets ::new`]. Otherwise, the
113113 /// conversion methods involving characters will produce unspecified (but memory-safe) results.
114114 pub fn from_bytes ( content : & [ u8 ] ) -> Self {
115115 new_converter ( content)
@@ -293,7 +293,7 @@ impl Utf8Converter {
293293 }
294294}
295295
296- fn new_converter ( content : & [ u8 ] ) -> Utf8Converter {
296+ fn new_converter ( content : & [ u8 ] ) -> StringOffsets {
297297 let mut utf8_builder = BitRankBuilder :: new ( ) ;
298298 let mut utf16_builder = BitRankBuilder :: new ( ) ;
299299 let mut line_builder = BitRankBuilder :: new ( ) ;
@@ -334,7 +334,7 @@ fn new_converter(content: &[u8]) -> Utf8Converter {
334334 line_builder. push ( content. len ( ) - 1 ) ;
335335 }
336336
337- Utf8Converter {
337+ StringOffsets {
338338 line_begins,
339339 utf8_to_line : line_builder. finish ( ) ,
340340 whitespace_only,
@@ -378,7 +378,7 @@ fn utf8_to_utf16_width(content: &[u8]) -> usize {
378378#[ cfg( test) ]
379379mod test {
380380 use super :: is_char_boundary;
381- use crate :: { utf8_to_utf16_width, utf8_width, Pos , Utf8Converter } ;
381+ use crate :: { utf8_to_utf16_width, utf8_width, Pos , StringOffsets } ;
382382
383383 #[ test]
384384 fn test_utf8_char_width ( ) {
@@ -417,7 +417,7 @@ mod test {
417417 let content = r#"a short line.
418418followed by another one.
419419no terminating newline!"# ;
420- let lines = Utf8Converter :: new ( content) ;
420+ let lines = StringOffsets :: new ( content) ;
421421 assert_eq ! ( lines. line_to_utf8s( 0 ) , 0 ..14 ) ;
422422 assert_eq ! ( & content[ 0 ..14 ] , "a short line.\n " ) ;
423423 assert_eq ! ( lines. line_to_utf8s( 1 ) , 14 ..39 ) ;
@@ -463,7 +463,7 @@ no terminating newline!"#;
463463 fn test_convert_ascii ( ) {
464464 let content = r#"line0
465465line1"# ;
466- let lines = Utf8Converter :: new ( content) ;
466+ let lines = StringOffsets :: new ( content) ;
467467 assert_eq ! ( lines. utf8_to_char_pos( 0 ) , pos( 0 , 0 ) ) ;
468468 assert_eq ! ( lines. utf8_to_char_pos( 1 ) , pos( 0 , 1 ) ) ;
469469 assert_eq ! ( lines. utf8_to_char_pos( 6 ) , pos( 1 , 0 ) ) ;
@@ -476,7 +476,7 @@ line1"#;
476476 let content = r#"❤️ line0
477477line1
478478✅ line2"# ;
479- let lines = Utf8Converter :: new ( content) ;
479+ let lines = StringOffsets :: new ( content) ;
480480 assert_eq ! ( lines. utf8_to_char_pos( 0 ) , pos( 0 , 0 ) ) ; // ❤️ takes 6 bytes to represent in utf8 (2 code points)
481481 assert_eq ! ( lines. utf8_to_char_pos( 1 ) , pos( 0 , 0 ) ) ;
482482 assert_eq ! ( lines. utf8_to_char_pos( 2 ) , pos( 0 , 0 ) ) ;
@@ -507,7 +507,7 @@ line1
507507 fn test_small ( ) {
508508 // Á - 2 bytes utf8
509509 let content = r#"❤️ line0 ❤️Á 👋"# ;
510- let lines = Utf8Converter :: new ( content) ;
510+ let lines = StringOffsets :: new ( content) ;
511511 let mut utf16_index = 0 ;
512512 let mut char_index = 0 ;
513513 for ( byte_index, char) in content. char_indices ( ) {
@@ -527,7 +527,7 @@ line1
527527 // ^~~~ utf8: 1 char, 1 byte, utf16: 1 code unit
528528 // ^~~~~ utf8: 1 char, 2 bytes, utf16: 1 code unit
529529 // ^~~~~~ utf8: 2 chars, 3 byte ea., utf16: 2 code units
530- let lines = Utf8Converter :: new ( content) ;
530+ let lines = StringOffsets :: new ( content) ;
531531
532532 // UTF-16 positions
533533 assert_eq ! ( lines. utf8_to_utf16_pos( 0 ) , pos( 0 , 0 ) ) ; // ❤️
@@ -573,7 +573,7 @@ line1
573573 #[ test]
574574 fn test_critical_input_len ( ) {
575575 let content = [ b'a' ; 16384 ] ;
576- let lines = Utf8Converter :: from_bytes ( & content) ;
576+ let lines = StringOffsets :: from_bytes ( & content) ;
577577 assert_eq ! ( lines. utf8_to_utf16_pos( 16384 ) , pos( 1 , 0 ) ) ;
578578 }
579579}
0 commit comments