@@ -129,6 +129,7 @@ mod _io {
129
129
PyMappedThreadMutexGuard , PyRwLock , PyRwLockReadGuard , PyRwLockWriteGuard ,
130
130
PyThreadMutex , PyThreadMutexGuard ,
131
131
} ,
132
+ common:: wtf8:: { Wtf8 , Wtf8Buf } ,
132
133
convert:: ToPyObject ,
133
134
function:: {
134
135
ArgBytesLike , ArgIterable , ArgMemoryBuffer , ArgSize , Either , FuncArgs , IntoFuncArgs ,
@@ -147,7 +148,6 @@ mod _io {
147
148
use crossbeam_utils:: atomic:: AtomicCell ;
148
149
use malachite_bigint:: { BigInt , BigUint } ;
149
150
use num_traits:: ToPrimitive ;
150
- use rustpython_common:: wtf8:: Wtf8Buf ;
151
151
use std:: {
152
152
borrow:: Cow ,
153
153
io:: { self , Cursor , SeekFrom , prelude:: * } ,
@@ -1910,10 +1910,12 @@ mod _io {
1910
1910
impl Newlines {
1911
1911
/// returns position where the new line starts if found, otherwise position at which to
1912
1912
/// continue the search after more is read into the buffer
1913
- fn find_newline ( & self , s : & str ) -> Result < usize , usize > {
1913
+ fn find_newline ( & self , s : & Wtf8 ) -> Result < usize , usize > {
1914
1914
let len = s. len ( ) ;
1915
1915
match self {
1916
- Newlines :: Universal | Newlines :: Lf => s. find ( '\n' ) . map ( |p| p + 1 ) . ok_or ( len) ,
1916
+ Newlines :: Universal | Newlines :: Lf => {
1917
+ s. find ( "\n " . as_ref ( ) ) . map ( |p| p + 1 ) . ok_or ( len)
1918
+ }
1917
1919
Newlines :: Passthrough => {
1918
1920
let bytes = s. as_bytes ( ) ;
1919
1921
memchr:: memchr2 ( b'\n' , b'\r' , bytes)
@@ -1928,7 +1930,7 @@ mod _io {
1928
1930
} )
1929
1931
. ok_or ( len)
1930
1932
}
1931
- Newlines :: Cr => s. find ( '\n' ) . map ( |p| p + 1 ) . ok_or ( len) ,
1933
+ Newlines :: Cr => s. find ( " \n " . as_ref ( ) ) . map ( |p| p + 1 ) . ok_or ( len) ,
1932
1934
Newlines :: Crlf => {
1933
1935
// s[searched..] == remaining
1934
1936
let mut searched = 0 ;
@@ -1993,10 +1995,10 @@ mod _io {
1993
1995
}
1994
1996
}
1995
1997
1996
- fn len_str ( s : & str ) -> Self {
1998
+ fn len_str ( s : & Wtf8 ) -> Self {
1997
1999
Utf8size {
1998
2000
bytes : s. len ( ) ,
1999
- chars : s. chars ( ) . count ( ) ,
2001
+ chars : s. code_points ( ) . count ( ) ,
2000
2002
}
2001
2003
}
2002
2004
}
@@ -2224,7 +2226,7 @@ mod _io {
2224
2226
2225
2227
let encoding = match args. encoding {
2226
2228
None if vm. state . settings . utf8_mode > 0 => PyStr :: from ( "utf-8" ) . into_ref ( & vm. ctx ) ,
2227
- Some ( enc) if enc. as_str ( ) != "locale" => enc,
2229
+ Some ( enc) if enc. as_wtf8 ( ) != "locale" => enc,
2228
2230
_ => {
2229
2231
// None without utf8_mode or "locale" encoding
2230
2232
vm. import ( "locale" , 0 ) ?
@@ -2534,9 +2536,10 @@ mod _io {
2534
2536
* snapshot = Some ( ( cookie. dec_flags , input_chunk. clone ( ) ) ) ;
2535
2537
let decoded = vm. call_method ( decoder, "decode" , ( input_chunk, cookie. need_eof ) ) ?;
2536
2538
let decoded = check_decoded ( decoded, vm) ?;
2537
- let pos_is_valid = decoded
2538
- . as_str ( )
2539
- . is_char_boundary ( cookie. bytes_to_skip as usize ) ;
2539
+ let pos_is_valid = crate :: common:: wtf8:: is_code_point_boundary (
2540
+ decoded. as_wtf8 ( ) ,
2541
+ cookie. bytes_to_skip as usize ,
2542
+ ) ;
2540
2543
textio. set_decoded_chars ( Some ( decoded) ) ;
2541
2544
if !pos_is_valid {
2542
2545
return Err ( vm. new_os_error ( "can't restore logical file position" . to_owned ( ) ) ) ;
@@ -2715,9 +2718,9 @@ mod _io {
2715
2718
} else if chunks. len ( ) == 1 {
2716
2719
chunks. pop ( ) . unwrap ( )
2717
2720
} else {
2718
- let mut ret = String :: with_capacity ( chunks_bytes) ;
2721
+ let mut ret = Wtf8Buf :: with_capacity ( chunks_bytes) ;
2719
2722
for chunk in chunks {
2720
- ret. push_str ( chunk. as_str ( ) )
2723
+ ret. push_wtf8 ( chunk. as_wtf8 ( ) )
2721
2724
}
2722
2725
PyStr :: from ( ret) . into_ref ( & vm. ctx )
2723
2726
}
@@ -2744,7 +2747,7 @@ mod _io {
2744
2747
2745
2748
let char_len = obj. char_len ( ) ;
2746
2749
2747
- let data = obj. as_str ( ) ;
2750
+ let data = obj. as_wtf8 ( ) ;
2748
2751
2749
2752
let replace_nl = match textio. newline {
2750
2753
Newlines :: Lf => Some ( "\n " ) ,
@@ -2753,11 +2756,12 @@ mod _io {
2753
2756
Newlines :: Universal if cfg ! ( windows) => Some ( "\r \n " ) ,
2754
2757
_ => None ,
2755
2758
} ;
2756
- let has_lf = ( replace_nl. is_some ( ) || textio. line_buffering ) && data. contains ( '\n' ) ;
2757
- let flush = textio. line_buffering && ( has_lf || data. contains ( '\r' ) ) ;
2759
+ let has_lf = ( replace_nl. is_some ( ) || textio. line_buffering )
2760
+ && data. contains_code_point ( '\n' . into ( ) ) ;
2761
+ let flush = textio. line_buffering && ( has_lf || data. contains_code_point ( '\r' . into ( ) ) ) ;
2758
2762
let chunk = if let Some ( replace_nl) = replace_nl {
2759
2763
if has_lf {
2760
- PyStr :: from ( data. replace ( '\n' , replace_nl) ) . into_ref ( & vm. ctx )
2764
+ PyStr :: from ( data. replace ( " \n " . as_ref ( ) , replace_nl. as_ref ( ) ) ) . into_ref ( & vm. ctx )
2761
2765
} else {
2762
2766
obj
2763
2767
}
@@ -2834,16 +2838,16 @@ mod _io {
2834
2838
if self . is_full_slice ( ) {
2835
2839
self . 0 . char_len ( )
2836
2840
} else {
2837
- self . slice ( ) . chars ( ) . count ( )
2841
+ self . slice ( ) . code_points ( ) . count ( )
2838
2842
}
2839
2843
}
2840
2844
#[ inline]
2841
2845
fn is_full_slice ( & self ) -> bool {
2842
2846
self . 1 . len ( ) >= self . 0 . byte_len ( )
2843
2847
}
2844
2848
#[ inline]
2845
- fn slice ( & self ) -> & str {
2846
- & self . 0 . as_str ( ) [ self . 1 . clone ( ) ]
2849
+ fn slice ( & self ) -> & Wtf8 {
2850
+ & self . 0 . as_wtf8 ( ) [ self . 1 . clone ( ) ]
2847
2851
}
2848
2852
#[ inline]
2849
2853
fn slice_pystr ( self , vm : & VirtualMachine ) -> PyStrRef {
@@ -2894,24 +2898,24 @@ mod _io {
2894
2898
Some ( remaining) => {
2895
2899
assert_eq ! ( textio. decoded_chars_used. bytes, 0 ) ;
2896
2900
offset_to_buffer = remaining. utf8_len ( ) ;
2897
- let decoded_chars = decoded_chars. as_str ( ) ;
2901
+ let decoded_chars = decoded_chars. as_wtf8 ( ) ;
2898
2902
let line = if remaining. is_full_slice ( ) {
2899
2903
let mut line = remaining. 0 ;
2900
2904
line. concat_in_place ( decoded_chars, vm) ;
2901
2905
line
2902
2906
} else {
2903
2907
let remaining = remaining. slice ( ) ;
2904
2908
let mut s =
2905
- String :: with_capacity ( remaining. len ( ) + decoded_chars. len ( ) ) ;
2906
- s. push_str ( remaining) ;
2907
- s. push_str ( decoded_chars) ;
2909
+ Wtf8Buf :: with_capacity ( remaining. len ( ) + decoded_chars. len ( ) ) ;
2910
+ s. push_wtf8 ( remaining) ;
2911
+ s. push_wtf8 ( decoded_chars) ;
2908
2912
PyStr :: from ( s) . into_ref ( & vm. ctx )
2909
2913
} ;
2910
2914
start = Utf8size :: default ( ) ;
2911
2915
line
2912
2916
}
2913
2917
} ;
2914
- let line_from_start = & line. as_str ( ) [ start. bytes ..] ;
2918
+ let line_from_start = & line. as_wtf8 ( ) [ start. bytes ..] ;
2915
2919
let nl_res = textio. newline . find_newline ( line_from_start) ;
2916
2920
match nl_res {
2917
2921
Ok ( p) | Err ( p) => {
@@ -2922,7 +2926,7 @@ mod _io {
2922
2926
endpos = start
2923
2927
+ Utf8size {
2924
2928
chars : limit - chunked. chars ,
2925
- bytes : crate :: common:: str:: char_range_end (
2929
+ bytes : crate :: common:: str:: codepoint_range_end (
2926
2930
line_from_start,
2927
2931
limit - chunked. chars ,
2928
2932
)
@@ -2963,9 +2967,9 @@ mod _io {
2963
2967
chunked += cur_line. byte_len ( ) ;
2964
2968
chunks. push ( cur_line) ;
2965
2969
}
2966
- let mut s = String :: with_capacity ( chunked) ;
2970
+ let mut s = Wtf8Buf :: with_capacity ( chunked) ;
2967
2971
for chunk in chunks {
2968
- s. push_str ( chunk. slice ( ) )
2972
+ s. push_wtf8 ( chunk. slice ( ) )
2969
2973
}
2970
2974
PyStr :: from ( s) . into_ref ( & vm. ctx )
2971
2975
} else if let Some ( cur_line) = cur_line {
@@ -3100,7 +3104,7 @@ mod _io {
3100
3104
return None ;
3101
3105
}
3102
3106
let decoded_chars = self . decoded_chars . as_ref ( ) ?;
3103
- let avail = & decoded_chars. as_str ( ) [ self . decoded_chars_used . bytes ..] ;
3107
+ let avail = & decoded_chars. as_wtf8 ( ) [ self . decoded_chars_used . bytes ..] ;
3104
3108
if avail. is_empty ( ) {
3105
3109
return None ;
3106
3110
}
@@ -3112,7 +3116,7 @@ mod _io {
3112
3116
( PyStr :: from ( avail) . into_ref ( & vm. ctx ) , avail_chars)
3113
3117
}
3114
3118
} else {
3115
- let s = crate :: common:: str:: get_chars ( avail, 0 ..n) ;
3119
+ let s = crate :: common:: str:: get_codepoints ( avail, 0 ..n) ;
3116
3120
( PyStr :: from ( s) . into_ref ( & vm. ctx ) , n)
3117
3121
} ;
3118
3122
self . decoded_chars_used += Utf8size {
@@ -3142,11 +3146,11 @@ mod _io {
3142
3146
return decoded_chars;
3143
3147
}
3144
3148
// TODO: in-place editing of `str` when refcount == 1
3145
- let decoded_chars_unused = & decoded_chars. as_str ( ) [ chars_pos..] ;
3146
- let mut s = String :: with_capacity ( decoded_chars_unused. len ( ) + append_len) ;
3147
- s. push_str ( decoded_chars_unused) ;
3149
+ let decoded_chars_unused = & decoded_chars. as_wtf8 ( ) [ chars_pos..] ;
3150
+ let mut s = Wtf8Buf :: with_capacity ( decoded_chars_unused. len ( ) + append_len) ;
3151
+ s. push_wtf8 ( decoded_chars_unused) ;
3148
3152
if let Some ( append) = append {
3149
- s. push_str ( append. as_str ( ) )
3153
+ s. push_wtf8 ( append. as_wtf8 ( ) )
3150
3154
}
3151
3155
PyStr :: from ( s) . into_ref ( & vm. ctx )
3152
3156
}
0 commit comments