Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Allow surrogates in str #5587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Implement fsencode/fsdecode for FsPath
  • Loading branch information
coolreader18 committed Mar 26, 2025
commit 5c2269734402cef5bbd89ebd6358a08faa64697c
3 changes: 0 additions & 3 deletions 3 Lib/test/test_socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -1578,7 +1578,6 @@ def test_getnameinfo(self):
# only IP addresses are allowed
self.assertRaises(OSError, socket.getnameinfo, ('mail.python.org',0), 0)

@unittest.expectedFailureIf(sys.platform != "darwin", "TODO: RUSTPYTHON; socket.gethostbyname_ex")
@unittest.skipUnless(support.is_resource_enabled('network'),
'network is not enabled')
def test_idna(self):
Expand Down Expand Up @@ -5519,8 +5518,6 @@ def testBytesAddr(self):
self.addCleanup(os_helper.unlink, path)
self.assertEqual(self.sock.getsockname(), path)

# TODO: RUSTPYTHON, surrogateescape
@unittest.expectedFailure
def testSurrogateescapeBind(self):
# Test binding to a valid non-ASCII pathname, with the
# non-ASCII bytes supplied using surrogateescape encoding.
Expand Down
13 changes: 9 additions & 4 deletions 13 stdlib/src/socket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -930,10 +930,15 @@ mod _socket {
match family {
#[cfg(unix)]
c::AF_UNIX => {
use crate::vm::function::ArgStrOrBytesLike;
use std::os::unix::ffi::OsStrExt;
let buf = crate::vm::function::ArgStrOrBytesLike::try_from_object(vm, addr)?;
let path = &*buf.borrow_bytes();
socket2::SockAddr::unix(ffi::OsStr::from_bytes(path))
let buf = ArgStrOrBytesLike::try_from_object(vm, addr)?;
let bytes = &*buf.borrow_bytes();
let path = match &buf {
ArgStrOrBytesLike::Buf(_) => ffi::OsStr::from_bytes(bytes).into(),
ArgStrOrBytesLike::Str(s) => vm.fsencode(s)?,
};
socket2::SockAddr::unix(path)
.map_err(|_| vm.new_os_error("AF_UNIX path too long".to_owned()).into())
}
c::AF_INET => {
Expand Down Expand Up @@ -1704,7 +1709,7 @@ mod _socket {
let path = ffi::OsStr::as_bytes(addr.as_pathname().unwrap_or("".as_ref()).as_ref());
let nul_pos = memchr::memchr(b'\0', path).unwrap_or(path.len());
let path = ffi::OsStr::from_bytes(&path[..nul_pos]);
return vm.ctx.new_str(path.to_string_lossy()).into();
return vm.fsdecode(path).into();
}
// TODO: support more address families
(String::new(), 0).to_pyobject(vm)
Expand Down
12 changes: 4 additions & 8 deletions 12 vm/src/function/fspath.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
function::PyStr,
protocol::PyBuffer,
};
use std::{ffi::OsStr, path::PathBuf};
use std::{borrow::Cow, ffi::OsStr, path::PathBuf};

#[derive(Clone)]
pub enum FsPath {
Expand Down Expand Up @@ -58,15 +58,11 @@ impl FsPath {
})
}

pub fn as_os_str(&self, vm: &VirtualMachine) -> PyResult<&OsStr> {
pub fn as_os_str(&self, vm: &VirtualMachine) -> PyResult<Cow<'_, OsStr>> {
// TODO: FS encodings
match self {
FsPath::Str(s) => {
// XXX RUSTPYTHON: this is sketchy on windows; it's not guaranteed that its
// OsStr encoding will always be compatible with WTF-8.
Ok(unsafe { OsStr::from_encoded_bytes_unchecked(s.as_wtf8().as_bytes()) })
}
FsPath::Bytes(b) => Self::bytes_as_osstr(b.as_bytes(), vm),
FsPath::Str(s) => vm.fsencode(s),
FsPath::Bytes(b) => Self::bytes_as_osstr(b.as_bytes(), vm).map(Cow::Borrowed),
}
}

Expand Down
34 changes: 10 additions & 24 deletions 34 vm/src/ospath.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,14 @@ pub(super) enum OutputMode {
}

impl OutputMode {
pub(super) fn process_path(self, path: impl Into<PathBuf>, vm: &VirtualMachine) -> PyResult {
fn inner(mode: OutputMode, path: PathBuf, vm: &VirtualMachine) -> PyResult {
let path_as_string = |p: PathBuf| {
p.into_os_string().into_string().map_err(|_| {
vm.new_unicode_decode_error(
"Can't convert OS path to valid UTF-8 string".into(),
)
})
};
pub(super) fn process_path(self, path: impl Into<PathBuf>, vm: &VirtualMachine) -> PyObjectRef {
fn inner(mode: OutputMode, path: PathBuf, vm: &VirtualMachine) -> PyObjectRef {
match mode {
OutputMode::String => path_as_string(path).map(|s| vm.ctx.new_str(s).into()),
OutputMode::Bytes => {
#[cfg(any(unix, target_os = "wasi"))]
{
use rustpython_common::os::ffi::OsStringExt;
Ok(vm.ctx.new_bytes(path.into_os_string().into_vec()).into())
}
#[cfg(windows)]
{
path_as_string(path).map(|s| vm.ctx.new_bytes(s.into_bytes()).into())
}
}
OutputMode::String => vm.fsdecode(path).into(),
OutputMode::Bytes => vm
.ctx
.new_bytes(path.into_os_string().into_encoded_bytes())
.into(),
}
}
inner(self, path.into(), vm)
Expand All @@ -59,7 +45,7 @@ impl OsPath {
}

pub(crate) fn from_fspath(fspath: FsPath, vm: &VirtualMachine) -> PyResult<OsPath> {
let path = fspath.as_os_str(vm)?.to_owned();
let path = fspath.as_os_str(vm)?.into_owned();
let mode = match fspath {
FsPath::Str(_) => OutputMode::String,
FsPath::Bytes(_) => OutputMode::Bytes,
Expand Down Expand Up @@ -88,7 +74,7 @@ impl OsPath {
widestring::WideCString::from_os_str(&self.path).map_err(|err| err.to_pyexception(vm))
}

pub fn filename(&self, vm: &VirtualMachine) -> PyResult {
pub fn filename(&self, vm: &VirtualMachine) -> PyObjectRef {
self.mode.process_path(self.path.clone(), vm)
}
}
Expand Down Expand Up @@ -133,7 +119,7 @@ impl From<OsPath> for OsPathOrFd {
impl OsPathOrFd {
pub fn filename(&self, vm: &VirtualMachine) -> PyObjectRef {
match self {
OsPathOrFd::Path(path) => path.filename(vm).unwrap_or_else(|_| vm.ctx.none()),
OsPathOrFd::Path(path) => path.filename(vm),
OsPathOrFd::Fd(fd) => vm.ctx.new_int(*fd).into(),
}
}
Expand Down
7 changes: 6 additions & 1 deletion 7 vm/src/stdlib/codecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,12 @@ mod _codecs {

#[pyfunction]
fn utf_8_encode(args: EncodeArgs, vm: &VirtualMachine) -> EncodeResult {
if args.s.is_utf8() {
if args.s.is_utf8()
|| args
.errors
.as_ref()
.is_some_and(|s| s.is(identifier!(vm, surrogatepass)))
{
return Ok((args.s.as_bytes().to_vec(), args.s.byte_len()));
}
do_codec!(utf8::encode, args, vm)
Expand Down
4 changes: 2 additions & 2 deletions 4 vm/src/stdlib/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2225,7 +2225,7 @@ mod _io {
*data = None;

let encoding = match args.encoding {
None if vm.state.settings.utf8_mode > 0 => PyStr::from("utf-8").into_ref(&vm.ctx),
None if vm.state.settings.utf8_mode > 0 => identifier!(vm, utf_8).to_owned(),
Some(enc) if enc.as_wtf8() != "locale" => enc,
_ => {
// None without utf8_mode or "locale" encoding
Expand All @@ -2238,7 +2238,7 @@ mod _io {

let errors = args
.errors
.unwrap_or_else(|| PyStr::from("strict").into_ref(&vm.ctx));
.unwrap_or_else(|| identifier!(vm, strict).to_owned());

let has_read1 = vm.get_attribute_opt(buffer.clone(), "read1")?.is_some();
let seekable = vm.call_method(&buffer, "seekable", ())?.try_to_bool(vm)?;
Expand Down
6 changes: 3 additions & 3 deletions 6 vm/src/stdlib/nt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ pub(crate) mod module {
.as_ref()
.canonicalize()
.map_err(|e| e.to_pyexception(vm))?;
path.mode.process_path(real, vm)
Ok(path.mode.process_path(real, vm))
}

#[pyfunction]
Expand Down Expand Up @@ -282,7 +282,7 @@ pub(crate) mod module {
}
}
let buffer = widestring::WideCString::from_vec_truncate(buffer);
path.mode.process_path(buffer.to_os_string(), vm)
Ok(path.mode.process_path(buffer.to_os_string(), vm))
}

#[pyfunction]
Expand All @@ -297,7 +297,7 @@ pub(crate) mod module {
return Err(errno_err(vm));
}
let buffer = widestring::WideCString::from_vec_truncate(buffer);
path.mode.process_path(buffer.to_os_string(), vm)
Ok(path.mode.process_path(buffer.to_os_string(), vm))
}

#[pyfunction]
Expand Down
38 changes: 17 additions & 21 deletions 38 vm/src/stdlib/os.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ pub(super) mod _os {
};
dir_iter
.map(|entry| match entry {
Ok(entry_path) => path.mode.process_path(entry_path.file_name(), vm),
Ok(entry_path) => Ok(path.mode.process_path(entry_path.file_name(), vm)),
Err(err) => Err(IOErrorBuilder::with_filename(&err, path.clone(), vm)),
})
.collect::<PyResult<_>>()?
Expand All @@ -352,22 +352,18 @@ pub(super) mod _os {
let mut dir =
nix::dir::Dir::from_fd(new_fd).map_err(|e| e.into_pyexception(vm))?;
dir.iter()
.filter_map(|entry| {
entry
.map_err(|e| e.into_pyexception(vm))
.and_then(|entry| {
let fname = entry.file_name().to_bytes();
Ok(match fname {
b"." | b".." => None,
_ => Some(
OutputMode::String
.process_path(ffi::OsStr::from_bytes(fname), vm)?,
),
})
})
.transpose()
.filter_map_ok(|entry| {
let fname = entry.file_name().to_bytes();
match fname {
b"." | b".." => None,
_ => Some(
OutputMode::String
.process_path(ffi::OsStr::from_bytes(fname), vm),
),
}
})
.collect::<PyResult<_>>()?
.collect::<Result<_, _>>()
.map_err(|e| e.into_pyexception(vm))?
}
}
};
Expand Down Expand Up @@ -429,7 +425,7 @@ pub(super) mod _os {
let [] = dir_fd.0;
let path =
fs::read_link(&path).map_err(|err| IOErrorBuilder::with_filename(&err, path, vm))?;
mode.process_path(path, vm)
Ok(mode.process_path(path, vm))
}

#[pyattr]
Expand All @@ -452,12 +448,12 @@ pub(super) mod _os {
impl DirEntry {
#[pygetset]
fn name(&self, vm: &VirtualMachine) -> PyResult {
self.mode.process_path(&self.file_name, vm)
Ok(self.mode.process_path(&self.file_name, vm))
}

#[pygetset]
fn path(&self, vm: &VirtualMachine) -> PyResult {
self.mode.process_path(&self.pathval, vm)
Ok(self.mode.process_path(&self.pathval, vm))
}

fn perform_on_metadata(
Expand Down Expand Up @@ -908,12 +904,12 @@ pub(super) mod _os {

#[pyfunction]
fn getcwd(vm: &VirtualMachine) -> PyResult {
OutputMode::String.process_path(curdir_inner(vm)?, vm)
Ok(OutputMode::String.process_path(curdir_inner(vm)?, vm))
}

#[pyfunction]
fn getcwdb(vm: &VirtualMachine) -> PyResult {
OutputMode::Bytes.process_path(curdir_inner(vm)?, vm)
Ok(OutputMode::Bytes.process_path(curdir_inner(vm)?, vm))
}

#[pyfunction]
Expand Down
16 changes: 4 additions & 12 deletions 16 vm/src/stdlib/sys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,21 +458,13 @@ mod sys {
}

#[pyfunction]
fn getfilesystemencoding(_vm: &VirtualMachine) -> String {
// TODO: implement non-utf-8 mode.
"utf-8".to_owned()
fn getfilesystemencoding(vm: &VirtualMachine) -> PyStrRef {
vm.fs_encoding().to_owned()
}

#[cfg(not(windows))]
#[pyfunction]
fn getfilesystemencodeerrors(_vm: &VirtualMachine) -> String {
"surrogateescape".to_owned()
}

#[cfg(windows)]
#[pyfunction]
fn getfilesystemencodeerrors(_vm: &VirtualMachine) -> String {
"surrogatepass".to_owned()
fn getfilesystemencodeerrors(vm: &VirtualMachine) -> PyStrRef {
vm.fs_encode_errors().to_owned()
}

#[pyfunction]
Expand Down
17 changes: 14 additions & 3 deletions 17 vm/src/vm/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ pub struct Context {
}

macro_rules! declare_const_name {
($($name:ident,)*) => {
($($name:ident$(: $s:literal)?,)*) => {
#[derive(Debug, Clone, Copy)]
#[allow(non_snake_case)]
pub struct ConstName {
Expand All @@ -61,11 +61,13 @@ macro_rules! declare_const_name {
impl ConstName {
unsafe fn new(pool: &StringPool, typ: &PyTypeRef) -> Self {
Self {
$($name: unsafe { pool.intern(stringify!($name), typ.clone()) },)*
$($name: unsafe { pool.intern(declare_const_name!(@string $name $($s)?), typ.clone()) },)*
}
}
}
}
};
(@string $name:ident) => { stringify!($name) };
(@string $name:ident $string:literal) => { $string };
}

declare_const_name! {
Expand Down Expand Up @@ -236,6 +238,15 @@ declare_const_name! {
flush,
close,
WarningMessage,
strict,
ignore,
replace,
xmlcharrefreplace,
backslashreplace,
namereplace,
surrogatepass,
surrogateescape,
utf_8: "utf-8",
}

// Basic objects:
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.